gcc/
[official-gcc.git] / gcc / config / i386 / i386.c
blob7bd9ff307b6470a68791c98ff14efcb1b0d12114
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 4, /* cond_taken_branch_cost. */
1029 2, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 4, /* cond_taken_branch_cost. */
1125 2, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 4, /* cond_taken_branch_cost. */
1212 2, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 4, /* cond_taken_branch_cost. */
1298 2, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2329 [frame] |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2334 struct ix86_frame
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2541 int i;
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2557 namespace {
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2604 static inline bool
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2731 if (arch)
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2738 if (tune)
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2810 default:
2811 gcc_unreachable ();
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2836 for (i = 0; i < num; i++)
2838 size_t len2[2];
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2843 if (i != 0)
2845 *ptr++ = ' ';
2846 line_len++;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2868 return ret;
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2889 if (opts)
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2894 else
2895 fputs ("<no options>\n\n", stderr);
2897 return;
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2917 Examples:
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2983 if (i == last_alg)
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2991 if ((stringop_alg) i == rep_prefix_8_byte
2992 && !TARGET_64BIT)
2994 /* rep; movq isn't available in 32-bit code. */
2995 error ("stringop strategy name %s specified for option %s "
2996 "not supported for 32-bit code",
2997 alg_name,
2998 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2999 return;
3002 input_ranges[n].max = maxs;
3003 input_ranges[n].alg = (stringop_alg) i;
3004 if (!strcmp (align, "align"))
3005 input_ranges[n].noalign = false;
3006 else if (!strcmp (align, "noalign"))
3007 input_ranges[n].noalign = true;
3008 else
3010 error ("unknown alignment %s specified for option %s",
3011 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3012 return;
3014 n++;
3015 curr_range_str = next_range_str;
3017 while (curr_range_str);
3019 if (input_ranges[n - 1].max != -1)
3021 error ("the max value for the last size range should be -1"
3022 " for option %s",
3023 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3024 return;
3027 if (n > MAX_STRINGOP_ALGS)
3029 error ("too many size ranges specified in option %s",
3030 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3031 return;
3034 /* Now override the default algs array. */
3035 for (i = 0; i < n; i++)
3037 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3038 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3039 = input_ranges[i].alg;
3040 *const_cast<int *>(&default_algs->size[i].noalign)
3041 = input_ranges[i].noalign;
3046 /* parse -mtune-ctrl= option. When DUMP is true,
3047 print the features that are explicitly set. */
3049 static void
3050 parse_mtune_ctrl_str (bool dump)
3052 if (!ix86_tune_ctrl_string)
3053 return;
3055 char *next_feature_string = NULL;
3056 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3057 char *orig = curr_feature_string;
3058 int i;
3061 bool clear = false;
3063 next_feature_string = strchr (curr_feature_string, ',');
3064 if (next_feature_string)
3065 *next_feature_string++ = '\0';
3066 if (*curr_feature_string == '^')
3068 curr_feature_string++;
3069 clear = true;
3071 for (i = 0; i < X86_TUNE_LAST; i++)
3073 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3075 ix86_tune_features[i] = !clear;
3076 if (dump)
3077 fprintf (stderr, "Explicitly %s feature %s\n",
3078 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3079 break;
3082 if (i == X86_TUNE_LAST)
3083 error ("Unknown parameter to option -mtune-ctrl: %s",
3084 clear ? curr_feature_string - 1 : curr_feature_string);
3085 curr_feature_string = next_feature_string;
3087 while (curr_feature_string);
3088 free (orig);
3091 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3092 processor type. */
3094 static void
3095 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3097 unsigned int ix86_tune_mask = 1u << ix86_tune;
3098 int i;
3100 for (i = 0; i < X86_TUNE_LAST; ++i)
3102 if (ix86_tune_no_default)
3103 ix86_tune_features[i] = 0;
3104 else
3105 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3108 if (dump)
3110 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3111 for (i = 0; i < X86_TUNE_LAST; i++)
3112 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3113 ix86_tune_features[i] ? "on" : "off");
3116 parse_mtune_ctrl_str (dump);
3120 /* Default align_* from the processor table. */
3122 static void
3123 ix86_default_align (struct gcc_options *opts)
3125 if (opts->x_align_loops == 0)
3127 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3128 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3130 if (opts->x_align_jumps == 0)
3132 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3133 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3135 if (opts->x_align_functions == 0)
3137 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3141 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3143 static void
3144 ix86_override_options_after_change (void)
3146 ix86_default_align (&global_options);
3149 /* Override various settings based on options. If MAIN_ARGS_P, the
3150 options are from the command line, otherwise they are from
3151 attributes. */
3153 static void
3154 ix86_option_override_internal (bool main_args_p,
3155 struct gcc_options *opts,
3156 struct gcc_options *opts_set)
3158 int i;
3159 unsigned int ix86_arch_mask;
3160 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3161 const char *prefix;
3162 const char *suffix;
3163 const char *sw;
3165 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3166 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3167 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3168 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3169 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3170 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3171 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3172 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3173 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3174 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3175 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3176 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3177 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3178 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3179 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3180 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3181 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3182 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3183 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3184 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3185 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3186 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3187 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3188 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3189 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3190 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3191 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3192 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3193 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3194 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3195 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3196 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3197 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3198 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3199 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3200 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3201 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3202 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3203 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3204 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3205 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3206 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3207 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3208 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3209 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3210 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3211 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3212 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3213 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3214 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3215 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3216 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3217 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3218 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3219 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3220 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3221 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3223 #define PTA_CORE2 \
3224 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3225 | PTA_CX16 | PTA_FXSR)
3226 #define PTA_NEHALEM \
3227 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3228 #define PTA_WESTMERE \
3229 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3230 #define PTA_SANDYBRIDGE \
3231 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3232 #define PTA_IVYBRIDGE \
3233 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3234 #define PTA_HASWELL \
3235 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3236 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3237 #define PTA_BROADWELL \
3238 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3239 #define PTA_KNL \
3240 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3241 #define PTA_BONNELL \
3242 (PTA_CORE2 | PTA_MOVBE)
3243 #define PTA_SILVERMONT \
3244 (PTA_WESTMERE | PTA_MOVBE)
3246 /* if this reaches 64, need to widen struct pta flags below */
3248 static struct pta
3250 const char *const name; /* processor name or nickname. */
3251 const enum processor_type processor;
3252 const enum attr_cpu schedule;
3253 const unsigned HOST_WIDE_INT flags;
3255 const processor_alias_table[] =
3257 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3258 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3259 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3260 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3261 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3262 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3263 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3264 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3265 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3266 PTA_MMX | PTA_SSE | PTA_FXSR},
3267 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3268 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3269 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3270 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3271 PTA_MMX | PTA_SSE | PTA_FXSR},
3272 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3273 PTA_MMX | PTA_SSE | PTA_FXSR},
3274 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3275 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3276 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3277 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3278 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3279 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3280 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3281 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3282 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3285 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3286 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3287 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3288 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3289 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3290 PTA_SANDYBRIDGE},
3291 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3292 PTA_SANDYBRIDGE},
3293 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3294 PTA_IVYBRIDGE},
3295 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3296 PTA_IVYBRIDGE},
3297 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3298 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3299 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3300 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3301 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3302 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3303 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3304 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3305 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3306 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3307 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3308 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3309 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3310 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3311 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3312 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3313 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3314 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3315 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3316 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3317 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3318 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3319 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3320 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3321 {"x86-64", PROCESSOR_K8, CPU_K8,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3323 {"k8", PROCESSOR_K8, CPU_K8,
3324 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3325 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3326 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3327 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3328 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3329 {"opteron", PROCESSOR_K8, CPU_K8,
3330 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3331 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3332 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3333 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3334 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3335 {"athlon64", PROCESSOR_K8, CPU_K8,
3336 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3337 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3338 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3339 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3340 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3341 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3342 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3343 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3344 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3345 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3346 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3347 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3348 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3349 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3350 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3351 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3352 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3353 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3354 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3355 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3356 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3357 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3358 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3359 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3360 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3361 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3362 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3363 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3364 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3365 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3366 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3367 | PTA_XSAVEOPT | PTA_FSGSBASE},
3368 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3369 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3370 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3371 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3372 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3373 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3374 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3375 | PTA_MOVBE},
3376 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3377 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3378 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3379 | PTA_FXSR | PTA_XSAVE},
3380 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3381 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3382 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3383 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3384 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3385 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3387 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3388 PTA_64BIT
3389 | PTA_HLE /* flags are only used for -march switch. */ },
3392 /* -mrecip options. */
3393 static struct
3395 const char *string; /* option name */
3396 unsigned int mask; /* mask bits to set */
3398 const recip_options[] =
3400 { "all", RECIP_MASK_ALL },
3401 { "none", RECIP_MASK_NONE },
3402 { "div", RECIP_MASK_DIV },
3403 { "sqrt", RECIP_MASK_SQRT },
3404 { "vec-div", RECIP_MASK_VEC_DIV },
3405 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3408 int const pta_size = ARRAY_SIZE (processor_alias_table);
3410 /* Set up prefix/suffix so the error messages refer to either the command
3411 line argument, or the attribute(target). */
3412 if (main_args_p)
3414 prefix = "-m";
3415 suffix = "";
3416 sw = "switch";
3418 else
3420 prefix = "option(\"";
3421 suffix = "\")";
3422 sw = "attribute";
3425 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3426 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3427 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3428 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3429 #ifdef TARGET_BI_ARCH
3430 else
3432 #if TARGET_BI_ARCH == 1
3433 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3434 is on and OPTION_MASK_ABI_X32 is off. We turn off
3435 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3436 -mx32. */
3437 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3438 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3439 #else
3440 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3441 on and OPTION_MASK_ABI_64 is off. We turn off
3442 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3443 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3444 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3445 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3446 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3447 #endif
3449 #endif
3451 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3453 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3454 OPTION_MASK_ABI_64 for TARGET_X32. */
3455 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3456 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3458 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3459 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3460 | OPTION_MASK_ABI_X32
3461 | OPTION_MASK_ABI_64);
3462 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3464 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3465 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3466 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3467 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3470 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3471 SUBTARGET_OVERRIDE_OPTIONS;
3472 #endif
3474 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3475 SUBSUBTARGET_OVERRIDE_OPTIONS;
3476 #endif
3478 /* -fPIC is the default for x86_64. */
3479 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3480 opts->x_flag_pic = 2;
3482 /* Need to check -mtune=generic first. */
3483 if (opts->x_ix86_tune_string)
3485 /* As special support for cross compilers we read -mtune=native
3486 as -mtune=generic. With native compilers we won't see the
3487 -mtune=native, as it was changed by the driver. */
3488 if (!strcmp (opts->x_ix86_tune_string, "native"))
3490 opts->x_ix86_tune_string = "generic";
3492 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3493 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3494 "%stune=k8%s or %stune=generic%s instead as appropriate",
3495 prefix, suffix, prefix, suffix, prefix, suffix);
3497 else
3499 if (opts->x_ix86_arch_string)
3500 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3501 if (!opts->x_ix86_tune_string)
3503 opts->x_ix86_tune_string
3504 = processor_target_table[TARGET_CPU_DEFAULT].name;
3505 ix86_tune_defaulted = 1;
3508 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3509 or defaulted. We need to use a sensible tune option. */
3510 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3512 opts->x_ix86_tune_string = "generic";
3516 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3517 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3519 /* rep; movq isn't available in 32-bit code. */
3520 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3521 opts->x_ix86_stringop_alg = no_stringop;
3524 if (!opts->x_ix86_arch_string)
3525 opts->x_ix86_arch_string
3526 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3527 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3528 else
3529 ix86_arch_specified = 1;
3531 if (opts_set->x_ix86_pmode)
3533 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3534 && opts->x_ix86_pmode == PMODE_SI)
3535 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3536 && opts->x_ix86_pmode == PMODE_DI))
3537 error ("address mode %qs not supported in the %s bit mode",
3538 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3539 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3541 else
3542 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3543 ? PMODE_DI : PMODE_SI;
3545 if (!opts_set->x_ix86_abi)
3546 opts->x_ix86_abi = DEFAULT_ABI;
3548 /* For targets using ms ABI enable ms-extensions, if not
3549 explicit turned off. For non-ms ABI we turn off this
3550 option. */
3551 if (!opts_set->x_flag_ms_extensions)
3552 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3554 if (opts_set->x_ix86_cmodel)
3556 switch (opts->x_ix86_cmodel)
3558 case CM_SMALL:
3559 case CM_SMALL_PIC:
3560 if (opts->x_flag_pic)
3561 opts->x_ix86_cmodel = CM_SMALL_PIC;
3562 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3563 error ("code model %qs not supported in the %s bit mode",
3564 "small", "32");
3565 break;
3567 case CM_MEDIUM:
3568 case CM_MEDIUM_PIC:
3569 if (opts->x_flag_pic)
3570 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3571 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3572 error ("code model %qs not supported in the %s bit mode",
3573 "medium", "32");
3574 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3575 error ("code model %qs not supported in x32 mode",
3576 "medium");
3577 break;
3579 case CM_LARGE:
3580 case CM_LARGE_PIC:
3581 if (opts->x_flag_pic)
3582 opts->x_ix86_cmodel = CM_LARGE_PIC;
3583 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 error ("code model %qs not supported in the %s bit mode",
3585 "large", "32");
3586 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3587 error ("code model %qs not supported in x32 mode",
3588 "large");
3589 break;
3591 case CM_32:
3592 if (opts->x_flag_pic)
3593 error ("code model %s does not support PIC mode", "32");
3594 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3595 error ("code model %qs not supported in the %s bit mode",
3596 "32", "64");
3597 break;
3599 case CM_KERNEL:
3600 if (opts->x_flag_pic)
3602 error ("code model %s does not support PIC mode", "kernel");
3603 opts->x_ix86_cmodel = CM_32;
3605 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3606 error ("code model %qs not supported in the %s bit mode",
3607 "kernel", "32");
3608 break;
3610 default:
3611 gcc_unreachable ();
3614 else
3616 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3617 use of rip-relative addressing. This eliminates fixups that
3618 would otherwise be needed if this object is to be placed in a
3619 DLL, and is essentially just as efficient as direct addressing. */
3620 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3621 && (TARGET_RDOS || TARGET_PECOFF))
3622 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3623 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3624 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3625 else
3626 opts->x_ix86_cmodel = CM_32;
3628 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3630 error ("-masm=intel not supported in this configuration");
3631 opts->x_ix86_asm_dialect = ASM_ATT;
3633 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3634 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3635 sorry ("%i-bit mode not compiled in",
3636 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3638 for (i = 0; i < pta_size; i++)
3639 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3641 ix86_schedule = processor_alias_table[i].schedule;
3642 ix86_arch = processor_alias_table[i].processor;
3643 /* Default cpu tuning to the architecture. */
3644 ix86_tune = ix86_arch;
3646 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3647 && !(processor_alias_table[i].flags & PTA_64BIT))
3648 error ("CPU you selected does not support x86-64 "
3649 "instruction set");
3651 if (processor_alias_table[i].flags & PTA_MMX
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3654 if (processor_alias_table[i].flags & PTA_3DNOW
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3657 if (processor_alias_table[i].flags & PTA_3DNOW_A
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3660 if (processor_alias_table[i].flags & PTA_SSE
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3663 if (processor_alias_table[i].flags & PTA_SSE2
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3666 if (processor_alias_table[i].flags & PTA_SSE3
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3669 if (processor_alias_table[i].flags & PTA_SSSE3
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3672 if (processor_alias_table[i].flags & PTA_SSE4_1
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3675 if (processor_alias_table[i].flags & PTA_SSE4_2
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3678 if (processor_alias_table[i].flags & PTA_AVX
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3681 if (processor_alias_table[i].flags & PTA_AVX2
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3684 if (processor_alias_table[i].flags & PTA_FMA
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3687 if (processor_alias_table[i].flags & PTA_SSE4A
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3690 if (processor_alias_table[i].flags & PTA_FMA4
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3693 if (processor_alias_table[i].flags & PTA_XOP
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3696 if (processor_alias_table[i].flags & PTA_LWP
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3699 if (processor_alias_table[i].flags & PTA_ABM
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3702 if (processor_alias_table[i].flags & PTA_BMI
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3705 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3708 if (processor_alias_table[i].flags & PTA_TBM
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3711 if (processor_alias_table[i].flags & PTA_BMI2
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3714 if (processor_alias_table[i].flags & PTA_CX16
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3717 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3720 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3721 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3722 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3723 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3724 if (processor_alias_table[i].flags & PTA_MOVBE
3725 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3726 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3727 if (processor_alias_table[i].flags & PTA_AES
3728 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3729 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3730 if (processor_alias_table[i].flags & PTA_SHA
3731 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3732 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3733 if (processor_alias_table[i].flags & PTA_PCLMUL
3734 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3735 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3736 if (processor_alias_table[i].flags & PTA_FSGSBASE
3737 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3738 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3739 if (processor_alias_table[i].flags & PTA_RDRND
3740 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3741 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3742 if (processor_alias_table[i].flags & PTA_F16C
3743 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3744 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3745 if (processor_alias_table[i].flags & PTA_RTM
3746 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3747 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3748 if (processor_alias_table[i].flags & PTA_HLE
3749 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3750 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3751 if (processor_alias_table[i].flags & PTA_PRFCHW
3752 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3753 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3754 if (processor_alias_table[i].flags & PTA_RDSEED
3755 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3756 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3757 if (processor_alias_table[i].flags & PTA_ADX
3758 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3759 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3760 if (processor_alias_table[i].flags & PTA_FXSR
3761 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3762 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3763 if (processor_alias_table[i].flags & PTA_XSAVE
3764 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3765 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3766 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3767 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3768 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3769 if (processor_alias_table[i].flags & PTA_AVX512F
3770 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3771 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3772 if (processor_alias_table[i].flags & PTA_AVX512ER
3773 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3774 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3775 if (processor_alias_table[i].flags & PTA_AVX512PF
3776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3778 if (processor_alias_table[i].flags & PTA_AVX512CD
3779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3781 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3782 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3783 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3784 if (processor_alias_table[i].flags & PTA_PCOMMIT
3785 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3786 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3787 if (processor_alias_table[i].flags & PTA_CLWB
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3790 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3793 if (processor_alias_table[i].flags & PTA_XSAVEC
3794 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3796 if (processor_alias_table[i].flags & PTA_XSAVES
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3799 if (processor_alias_table[i].flags & PTA_AVX512DQ
3800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3802 if (processor_alias_table[i].flags & PTA_AVX512BW
3803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3805 if (processor_alias_table[i].flags & PTA_AVX512VL
3806 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3808 if (processor_alias_table[i].flags & PTA_MPX
3809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3811 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3814 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3815 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3817 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3818 x86_prefetch_sse = true;
3820 break;
3823 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3824 error ("Intel MPX does not support x32");
3826 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3827 error ("Intel MPX does not support x32");
3829 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3830 error ("generic CPU can be used only for %stune=%s %s",
3831 prefix, suffix, sw);
3832 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3833 error ("intel CPU can be used only for %stune=%s %s",
3834 prefix, suffix, sw);
3835 else if (i == pta_size)
3836 error ("bad value (%s) for %sarch=%s %s",
3837 opts->x_ix86_arch_string, prefix, suffix, sw);
3839 ix86_arch_mask = 1u << ix86_arch;
3840 for (i = 0; i < X86_ARCH_LAST; ++i)
3841 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3843 for (i = 0; i < pta_size; i++)
3844 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3846 ix86_schedule = processor_alias_table[i].schedule;
3847 ix86_tune = processor_alias_table[i].processor;
3848 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3850 if (!(processor_alias_table[i].flags & PTA_64BIT))
3852 if (ix86_tune_defaulted)
3854 opts->x_ix86_tune_string = "x86-64";
3855 for (i = 0; i < pta_size; i++)
3856 if (! strcmp (opts->x_ix86_tune_string,
3857 processor_alias_table[i].name))
3858 break;
3859 ix86_schedule = processor_alias_table[i].schedule;
3860 ix86_tune = processor_alias_table[i].processor;
3862 else
3863 error ("CPU you selected does not support x86-64 "
3864 "instruction set");
3867 /* Intel CPUs have always interpreted SSE prefetch instructions as
3868 NOPs; so, we can enable SSE prefetch instructions even when
3869 -mtune (rather than -march) points us to a processor that has them.
3870 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3871 higher processors. */
3872 if (TARGET_CMOV
3873 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3874 x86_prefetch_sse = true;
3875 break;
3878 if (ix86_tune_specified && i == pta_size)
3879 error ("bad value (%s) for %stune=%s %s",
3880 opts->x_ix86_tune_string, prefix, suffix, sw);
3882 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3884 #ifndef USE_IX86_FRAME_POINTER
3885 #define USE_IX86_FRAME_POINTER 0
3886 #endif
3888 #ifndef USE_X86_64_FRAME_POINTER
3889 #define USE_X86_64_FRAME_POINTER 0
3890 #endif
3892 /* Set the default values for switches whose default depends on TARGET_64BIT
3893 in case they weren't overwritten by command line options. */
3894 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3896 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3897 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3898 if (opts->x_flag_asynchronous_unwind_tables
3899 && !opts_set->x_flag_unwind_tables
3900 && TARGET_64BIT_MS_ABI)
3901 opts->x_flag_unwind_tables = 1;
3902 if (opts->x_flag_asynchronous_unwind_tables == 2)
3903 opts->x_flag_unwind_tables
3904 = opts->x_flag_asynchronous_unwind_tables = 1;
3905 if (opts->x_flag_pcc_struct_return == 2)
3906 opts->x_flag_pcc_struct_return = 0;
3908 else
3910 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3911 opts->x_flag_omit_frame_pointer
3912 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3913 if (opts->x_flag_asynchronous_unwind_tables == 2)
3914 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3915 if (opts->x_flag_pcc_struct_return == 2)
3916 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3919 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3920 /* TODO: ix86_cost should be chosen at instruction or function granuality
3921 so for cold code we use size_cost even in !optimize_size compilation. */
3922 if (opts->x_optimize_size)
3923 ix86_cost = &ix86_size_cost;
3924 else
3925 ix86_cost = ix86_tune_cost;
3927 /* Arrange to set up i386_stack_locals for all functions. */
3928 init_machine_status = ix86_init_machine_status;
3930 /* Validate -mregparm= value. */
3931 if (opts_set->x_ix86_regparm)
3933 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3934 warning (0, "-mregparm is ignored in 64-bit mode");
3935 if (opts->x_ix86_regparm > REGPARM_MAX)
3937 error ("-mregparm=%d is not between 0 and %d",
3938 opts->x_ix86_regparm, REGPARM_MAX);
3939 opts->x_ix86_regparm = 0;
3942 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3943 opts->x_ix86_regparm = REGPARM_MAX;
3945 /* Default align_* from the processor table. */
3946 ix86_default_align (opts);
3948 /* Provide default for -mbranch-cost= value. */
3949 if (!opts_set->x_ix86_branch_cost)
3950 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3952 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3954 opts->x_target_flags
3955 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3957 /* Enable by default the SSE and MMX builtins. Do allow the user to
3958 explicitly disable any of these. In particular, disabling SSE and
3959 MMX for kernel code is extremely useful. */
3960 if (!ix86_arch_specified)
3961 opts->x_ix86_isa_flags
3962 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3963 | TARGET_SUBTARGET64_ISA_DEFAULT)
3964 & ~opts->x_ix86_isa_flags_explicit);
3966 if (TARGET_RTD_P (opts->x_target_flags))
3967 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3969 else
3971 opts->x_target_flags
3972 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3974 if (!ix86_arch_specified)
3975 opts->x_ix86_isa_flags
3976 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3978 /* i386 ABI does not specify red zone. It still makes sense to use it
3979 when programmer takes care to stack from being destroyed. */
3980 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3981 opts->x_target_flags |= MASK_NO_RED_ZONE;
3984 /* Keep nonleaf frame pointers. */
3985 if (opts->x_flag_omit_frame_pointer)
3986 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3987 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3988 opts->x_flag_omit_frame_pointer = 1;
3990 /* If we're doing fast math, we don't care about comparison order
3991 wrt NaNs. This lets us use a shorter comparison sequence. */
3992 if (opts->x_flag_finite_math_only)
3993 opts->x_target_flags &= ~MASK_IEEE_FP;
3995 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3996 since the insns won't need emulation. */
3997 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3998 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4000 /* Likewise, if the target doesn't have a 387, or we've specified
4001 software floating point, don't use 387 inline intrinsics. */
4002 if (!TARGET_80387_P (opts->x_target_flags))
4003 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4005 /* Turn on MMX builtins for -msse. */
4006 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4007 opts->x_ix86_isa_flags
4008 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4010 /* Enable SSE prefetch. */
4011 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4012 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4013 x86_prefetch_sse = true;
4015 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4016 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4017 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4018 opts->x_ix86_isa_flags
4019 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4021 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4022 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4023 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4024 opts->x_ix86_isa_flags
4025 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4027 /* Enable lzcnt instruction for -mabm. */
4028 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4029 opts->x_ix86_isa_flags
4030 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4032 /* Validate -mpreferred-stack-boundary= value or default it to
4033 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4034 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4035 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4037 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4038 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4039 int max = (TARGET_SEH ? 4 : 12);
4041 if (opts->x_ix86_preferred_stack_boundary_arg < min
4042 || opts->x_ix86_preferred_stack_boundary_arg > max)
4044 if (min == max)
4045 error ("-mpreferred-stack-boundary is not supported "
4046 "for this target");
4047 else
4048 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4049 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4051 else
4052 ix86_preferred_stack_boundary
4053 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4056 /* Set the default value for -mstackrealign. */
4057 if (opts->x_ix86_force_align_arg_pointer == -1)
4058 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4060 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4062 /* Validate -mincoming-stack-boundary= value or default it to
4063 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4064 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4065 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4067 if (opts->x_ix86_incoming_stack_boundary_arg
4068 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4069 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4070 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4071 opts->x_ix86_incoming_stack_boundary_arg,
4072 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4073 else
4075 ix86_user_incoming_stack_boundary
4076 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4077 ix86_incoming_stack_boundary
4078 = ix86_user_incoming_stack_boundary;
4082 #ifndef NO_PROFILE_COUNTERS
4083 if (flag_nop_mcount)
4084 error ("-mnop-mcount is not compatible with this target");
4085 #endif
4086 if (flag_nop_mcount && flag_pic)
4087 error ("-mnop-mcount is not implemented for -fPIC");
4089 /* Accept -msseregparm only if at least SSE support is enabled. */
4090 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4091 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4092 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4094 if (opts_set->x_ix86_fpmath)
4096 if (opts->x_ix86_fpmath & FPMATH_SSE)
4098 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4100 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4101 opts->x_ix86_fpmath = FPMATH_387;
4103 else if ((opts->x_ix86_fpmath & FPMATH_387)
4104 && !TARGET_80387_P (opts->x_target_flags))
4106 warning (0, "387 instruction set disabled, using SSE arithmetics");
4107 opts->x_ix86_fpmath = FPMATH_SSE;
4111 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4112 fpmath=387. The second is however default at many targets since the
4113 extra 80bit precision of temporaries is considered to be part of ABI.
4114 Overwrite the default at least for -ffast-math.
4115 TODO: -mfpmath=both seems to produce same performing code with bit
4116 smaller binaries. It is however not clear if register allocation is
4117 ready for this setting.
4118 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4119 codegen. We may switch to 387 with -ffast-math for size optimized
4120 functions. */
4121 else if (fast_math_flags_set_p (&global_options)
4122 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4123 opts->x_ix86_fpmath = FPMATH_SSE;
4124 else
4125 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4127 /* If the i387 is disabled, then do not return values in it. */
4128 if (!TARGET_80387_P (opts->x_target_flags))
4129 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4131 /* Use external vectorized library in vectorizing intrinsics. */
4132 if (opts_set->x_ix86_veclibabi_type)
4133 switch (opts->x_ix86_veclibabi_type)
4135 case ix86_veclibabi_type_svml:
4136 ix86_veclib_handler = ix86_veclibabi_svml;
4137 break;
4139 case ix86_veclibabi_type_acml:
4140 ix86_veclib_handler = ix86_veclibabi_acml;
4141 break;
4143 default:
4144 gcc_unreachable ();
4147 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4148 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4149 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4151 /* If stack probes are required, the space used for large function
4152 arguments on the stack must also be probed, so enable
4153 -maccumulate-outgoing-args so this happens in the prologue. */
4154 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4155 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4157 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4158 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4159 "for correctness", prefix, suffix);
4160 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4163 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4165 char *p;
4166 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4167 p = strchr (internal_label_prefix, 'X');
4168 internal_label_prefix_len = p - internal_label_prefix;
4169 *p = '\0';
4172 /* When scheduling description is not available, disable scheduler pass
4173 so it won't slow down the compilation and make x87 code slower. */
4174 if (!TARGET_SCHEDULE)
4175 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4177 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4178 ix86_tune_cost->simultaneous_prefetches,
4179 opts->x_param_values,
4180 opts_set->x_param_values);
4181 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4182 ix86_tune_cost->prefetch_block,
4183 opts->x_param_values,
4184 opts_set->x_param_values);
4185 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4186 ix86_tune_cost->l1_cache_size,
4187 opts->x_param_values,
4188 opts_set->x_param_values);
4189 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4190 ix86_tune_cost->l2_cache_size,
4191 opts->x_param_values,
4192 opts_set->x_param_values);
4194 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4195 if (opts->x_flag_prefetch_loop_arrays < 0
4196 && HAVE_prefetch
4197 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4198 && !opts->x_optimize_size
4199 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4200 opts->x_flag_prefetch_loop_arrays = 1;
4202 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4203 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4204 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4205 targetm.expand_builtin_va_start = NULL;
4207 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4209 ix86_gen_leave = gen_leave_rex64;
4210 if (Pmode == DImode)
4212 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4213 ix86_gen_tls_local_dynamic_base_64
4214 = gen_tls_local_dynamic_base_64_di;
4216 else
4218 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4219 ix86_gen_tls_local_dynamic_base_64
4220 = gen_tls_local_dynamic_base_64_si;
4223 else
4224 ix86_gen_leave = gen_leave;
4226 if (Pmode == DImode)
4228 ix86_gen_add3 = gen_adddi3;
4229 ix86_gen_sub3 = gen_subdi3;
4230 ix86_gen_sub3_carry = gen_subdi3_carry;
4231 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4232 ix86_gen_andsp = gen_anddi3;
4233 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4234 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4235 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4236 ix86_gen_monitor = gen_sse3_monitor_di;
4238 else
4240 ix86_gen_add3 = gen_addsi3;
4241 ix86_gen_sub3 = gen_subsi3;
4242 ix86_gen_sub3_carry = gen_subsi3_carry;
4243 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4244 ix86_gen_andsp = gen_andsi3;
4245 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4246 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4247 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4248 ix86_gen_monitor = gen_sse3_monitor_si;
4251 #ifdef USE_IX86_CLD
4252 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4253 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4254 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4255 #endif
4257 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4259 if (opts->x_flag_fentry > 0)
4260 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4261 "with -fpic");
4262 opts->x_flag_fentry = 0;
4264 else if (TARGET_SEH)
4266 if (opts->x_flag_fentry == 0)
4267 sorry ("-mno-fentry isn%'t compatible with SEH");
4268 opts->x_flag_fentry = 1;
4270 else if (opts->x_flag_fentry < 0)
4272 #if defined(PROFILE_BEFORE_PROLOGUE)
4273 opts->x_flag_fentry = 1;
4274 #else
4275 opts->x_flag_fentry = 0;
4276 #endif
4279 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4280 opts->x_target_flags |= MASK_VZEROUPPER;
4281 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4282 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4283 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4284 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4285 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4286 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4287 /* Enable 128-bit AVX instruction generation
4288 for the auto-vectorizer. */
4289 if (TARGET_AVX128_OPTIMAL
4290 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4291 opts->x_target_flags |= MASK_PREFER_AVX128;
4293 if (opts->x_ix86_recip_name)
4295 char *p = ASTRDUP (opts->x_ix86_recip_name);
4296 char *q;
4297 unsigned int mask, i;
4298 bool invert;
4300 while ((q = strtok (p, ",")) != NULL)
4302 p = NULL;
4303 if (*q == '!')
4305 invert = true;
4306 q++;
4308 else
4309 invert = false;
4311 if (!strcmp (q, "default"))
4312 mask = RECIP_MASK_ALL;
4313 else
4315 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4316 if (!strcmp (q, recip_options[i].string))
4318 mask = recip_options[i].mask;
4319 break;
4322 if (i == ARRAY_SIZE (recip_options))
4324 error ("unknown option for -mrecip=%s", q);
4325 invert = false;
4326 mask = RECIP_MASK_NONE;
4330 opts->x_recip_mask_explicit |= mask;
4331 if (invert)
4332 opts->x_recip_mask &= ~mask;
4333 else
4334 opts->x_recip_mask |= mask;
4338 if (TARGET_RECIP_P (opts->x_target_flags))
4339 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4340 else if (opts_set->x_target_flags & MASK_RECIP)
4341 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4343 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4344 for 64-bit Bionic. */
4345 if (TARGET_HAS_BIONIC
4346 && !(opts_set->x_target_flags
4347 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4348 opts->x_target_flags |= (TARGET_64BIT
4349 ? MASK_LONG_DOUBLE_128
4350 : MASK_LONG_DOUBLE_64);
4352 /* Only one of them can be active. */
4353 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4354 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4356 /* Save the initial options in case the user does function specific
4357 options. */
4358 if (main_args_p)
4359 target_option_default_node = target_option_current_node
4360 = build_target_option_node (opts);
4362 /* Handle stack protector */
4363 if (!opts_set->x_ix86_stack_protector_guard)
4364 opts->x_ix86_stack_protector_guard
4365 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4367 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4368 if (opts->x_ix86_tune_memcpy_strategy)
4370 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4371 ix86_parse_stringop_strategy_string (str, false);
4372 free (str);
4375 if (opts->x_ix86_tune_memset_strategy)
4377 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4378 ix86_parse_stringop_strategy_string (str, true);
4379 free (str);
4383 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4385 static void
4386 ix86_option_override (void)
4388 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4389 struct register_pass_info insert_vzeroupper_info
4390 = { pass_insert_vzeroupper, "reload",
4391 1, PASS_POS_INSERT_AFTER
4394 ix86_option_override_internal (true, &global_options, &global_options_set);
4397 /* This needs to be done at start up. It's convenient to do it here. */
4398 register_pass (&insert_vzeroupper_info);
4401 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4402 static char *
4403 ix86_offload_options (void)
4405 if (TARGET_LP64)
4406 return xstrdup ("-foffload-abi=lp64");
4407 return xstrdup ("-foffload-abi=ilp32");
4410 /* Update register usage after having seen the compiler flags. */
4412 static void
4413 ix86_conditional_register_usage (void)
4415 int i, c_mask;
4417 /* For 32-bit targets, squash the REX registers. */
4418 if (! TARGET_64BIT)
4420 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4421 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4422 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4423 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4424 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4428 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4429 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4430 : TARGET_64BIT ? (1 << 2)
4431 : (1 << 1));
4433 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4437 /* Set/reset conditionally defined registers from
4438 CALL_USED_REGISTERS initializer. */
4439 if (call_used_regs[i] > 1)
4440 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4442 /* Calculate registers of CLOBBERED_REGS register set
4443 as call used registers from GENERAL_REGS register set. */
4444 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4445 && call_used_regs[i])
4446 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4449 /* If MMX is disabled, squash the registers. */
4450 if (! TARGET_MMX)
4451 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4452 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4453 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4455 /* If SSE is disabled, squash the registers. */
4456 if (! TARGET_SSE)
4457 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4458 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4459 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4461 /* If the FPU is disabled, squash the registers. */
4462 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4463 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4464 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4465 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4467 /* If AVX512F is disabled, squash the registers. */
4468 if (! TARGET_AVX512F)
4470 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4471 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4473 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4474 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4477 /* If MPX is disabled, squash the registers. */
4478 if (! TARGET_MPX)
4479 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4480 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4484 /* Save the current options */
4486 static void
4487 ix86_function_specific_save (struct cl_target_option *ptr,
4488 struct gcc_options *opts)
4490 ptr->arch = ix86_arch;
4491 ptr->schedule = ix86_schedule;
4492 ptr->prefetch_sse = x86_prefetch_sse;
4493 ptr->tune = ix86_tune;
4494 ptr->branch_cost = ix86_branch_cost;
4495 ptr->tune_defaulted = ix86_tune_defaulted;
4496 ptr->arch_specified = ix86_arch_specified;
4497 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4498 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4499 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4500 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4501 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4502 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4503 ptr->x_ix86_abi = opts->x_ix86_abi;
4504 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4505 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4506 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4507 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4508 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4509 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4510 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4511 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4512 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4513 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4514 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4515 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4516 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4517 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4518 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4519 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4520 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4521 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4522 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4523 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4525 /* The fields are char but the variables are not; make sure the
4526 values fit in the fields. */
4527 gcc_assert (ptr->arch == ix86_arch);
4528 gcc_assert (ptr->schedule == ix86_schedule);
4529 gcc_assert (ptr->tune == ix86_tune);
4530 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4533 /* Restore the current options */
4535 static void
4536 ix86_function_specific_restore (struct gcc_options *opts,
4537 struct cl_target_option *ptr)
4539 enum processor_type old_tune = ix86_tune;
4540 enum processor_type old_arch = ix86_arch;
4541 unsigned int ix86_arch_mask;
4542 int i;
4544 /* We don't change -fPIC. */
4545 opts->x_flag_pic = flag_pic;
4547 ix86_arch = (enum processor_type) ptr->arch;
4548 ix86_schedule = (enum attr_cpu) ptr->schedule;
4549 ix86_tune = (enum processor_type) ptr->tune;
4550 x86_prefetch_sse = ptr->prefetch_sse;
4551 opts->x_ix86_branch_cost = ptr->branch_cost;
4552 ix86_tune_defaulted = ptr->tune_defaulted;
4553 ix86_arch_specified = ptr->arch_specified;
4554 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4555 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4556 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4557 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4558 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4559 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4560 opts->x_ix86_abi = ptr->x_ix86_abi;
4561 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4562 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4563 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4564 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4565 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4566 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4567 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4568 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4569 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4570 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4571 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4572 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4573 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4574 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4575 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4576 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4577 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4578 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4579 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4580 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4581 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4582 /* TODO: ix86_cost should be chosen at instruction or function granuality
4583 so for cold code we use size_cost even in !optimize_size compilation. */
4584 if (opts->x_optimize_size)
4585 ix86_cost = &ix86_size_cost;
4586 else
4587 ix86_cost = ix86_tune_cost;
4589 /* Recreate the arch feature tests if the arch changed */
4590 if (old_arch != ix86_arch)
4592 ix86_arch_mask = 1u << ix86_arch;
4593 for (i = 0; i < X86_ARCH_LAST; ++i)
4594 ix86_arch_features[i]
4595 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4598 /* Recreate the tune optimization tests */
4599 if (old_tune != ix86_tune)
4600 set_ix86_tune_features (ix86_tune, false);
4603 /* Adjust target options after streaming them in. This is mainly about
4604 reconciling them with global options. */
4606 static void
4607 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4609 /* flag_pic is a global option, but ix86_cmodel is target saved option
4610 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4611 for PIC, or error out. */
4612 if (flag_pic)
4613 switch (ptr->x_ix86_cmodel)
4615 case CM_SMALL:
4616 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4617 break;
4619 case CM_MEDIUM:
4620 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4621 break;
4623 case CM_LARGE:
4624 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4625 break;
4627 case CM_KERNEL:
4628 error ("code model %s does not support PIC mode", "kernel");
4629 break;
4631 default:
4632 break;
4634 else
4635 switch (ptr->x_ix86_cmodel)
4637 case CM_SMALL_PIC:
4638 ptr->x_ix86_cmodel = CM_SMALL;
4639 break;
4641 case CM_MEDIUM_PIC:
4642 ptr->x_ix86_cmodel = CM_MEDIUM;
4643 break;
4645 case CM_LARGE_PIC:
4646 ptr->x_ix86_cmodel = CM_LARGE;
4647 break;
4649 default:
4650 break;
4654 /* Print the current options */
4656 static void
4657 ix86_function_specific_print (FILE *file, int indent,
4658 struct cl_target_option *ptr)
4660 char *target_string
4661 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4662 NULL, NULL, ptr->x_ix86_fpmath, false);
4664 gcc_assert (ptr->arch < PROCESSOR_max);
4665 fprintf (file, "%*sarch = %d (%s)\n",
4666 indent, "",
4667 ptr->arch, processor_target_table[ptr->arch].name);
4669 gcc_assert (ptr->tune < PROCESSOR_max);
4670 fprintf (file, "%*stune = %d (%s)\n",
4671 indent, "",
4672 ptr->tune, processor_target_table[ptr->tune].name);
4674 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4676 if (target_string)
4678 fprintf (file, "%*s%s\n", indent, "", target_string);
4679 free (target_string);
4684 /* Inner function to process the attribute((target(...))), take an argument and
4685 set the current options from the argument. If we have a list, recursively go
4686 over the list. */
4688 static bool
4689 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4690 struct gcc_options *opts,
4691 struct gcc_options *opts_set,
4692 struct gcc_options *enum_opts_set)
4694 char *next_optstr;
4695 bool ret = true;
4697 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4698 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4699 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4700 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4701 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4703 enum ix86_opt_type
4705 ix86_opt_unknown,
4706 ix86_opt_yes,
4707 ix86_opt_no,
4708 ix86_opt_str,
4709 ix86_opt_enum,
4710 ix86_opt_isa
4713 static const struct
4715 const char *string;
4716 size_t len;
4717 enum ix86_opt_type type;
4718 int opt;
4719 int mask;
4720 } attrs[] = {
4721 /* isa options */
4722 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4723 IX86_ATTR_ISA ("abm", OPT_mabm),
4724 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4725 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4726 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4727 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4728 IX86_ATTR_ISA ("aes", OPT_maes),
4729 IX86_ATTR_ISA ("sha", OPT_msha),
4730 IX86_ATTR_ISA ("avx", OPT_mavx),
4731 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4732 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4733 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4734 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4735 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4736 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4737 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4738 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4739 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4740 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4741 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4742 IX86_ATTR_ISA ("sse", OPT_msse),
4743 IX86_ATTR_ISA ("sse2", OPT_msse2),
4744 IX86_ATTR_ISA ("sse3", OPT_msse3),
4745 IX86_ATTR_ISA ("sse4", OPT_msse4),
4746 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4747 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4748 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4749 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4750 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4751 IX86_ATTR_ISA ("fma", OPT_mfma),
4752 IX86_ATTR_ISA ("xop", OPT_mxop),
4753 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4754 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4755 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4756 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4757 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4758 IX86_ATTR_ISA ("hle", OPT_mhle),
4759 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4760 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4761 IX86_ATTR_ISA ("adx", OPT_madx),
4762 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4763 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4764 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4765 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4766 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4767 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4768 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4769 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4770 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4771 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4772 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4774 /* enum options */
4775 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4777 /* string options */
4778 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4779 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4781 /* flag options */
4782 IX86_ATTR_YES ("cld",
4783 OPT_mcld,
4784 MASK_CLD),
4786 IX86_ATTR_NO ("fancy-math-387",
4787 OPT_mfancy_math_387,
4788 MASK_NO_FANCY_MATH_387),
4790 IX86_ATTR_YES ("ieee-fp",
4791 OPT_mieee_fp,
4792 MASK_IEEE_FP),
4794 IX86_ATTR_YES ("inline-all-stringops",
4795 OPT_minline_all_stringops,
4796 MASK_INLINE_ALL_STRINGOPS),
4798 IX86_ATTR_YES ("inline-stringops-dynamically",
4799 OPT_minline_stringops_dynamically,
4800 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4802 IX86_ATTR_NO ("align-stringops",
4803 OPT_mno_align_stringops,
4804 MASK_NO_ALIGN_STRINGOPS),
4806 IX86_ATTR_YES ("recip",
4807 OPT_mrecip,
4808 MASK_RECIP),
4812 /* If this is a list, recurse to get the options. */
4813 if (TREE_CODE (args) == TREE_LIST)
4815 bool ret = true;
4817 for (; args; args = TREE_CHAIN (args))
4818 if (TREE_VALUE (args)
4819 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4820 p_strings, opts, opts_set,
4821 enum_opts_set))
4822 ret = false;
4824 return ret;
4827 else if (TREE_CODE (args) != STRING_CST)
4829 error ("attribute %<target%> argument not a string");
4830 return false;
4833 /* Handle multiple arguments separated by commas. */
4834 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4836 while (next_optstr && *next_optstr != '\0')
4838 char *p = next_optstr;
4839 char *orig_p = p;
4840 char *comma = strchr (next_optstr, ',');
4841 const char *opt_string;
4842 size_t len, opt_len;
4843 int opt;
4844 bool opt_set_p;
4845 char ch;
4846 unsigned i;
4847 enum ix86_opt_type type = ix86_opt_unknown;
4848 int mask = 0;
4850 if (comma)
4852 *comma = '\0';
4853 len = comma - next_optstr;
4854 next_optstr = comma + 1;
4856 else
4858 len = strlen (p);
4859 next_optstr = NULL;
4862 /* Recognize no-xxx. */
4863 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4865 opt_set_p = false;
4866 p += 3;
4867 len -= 3;
4869 else
4870 opt_set_p = true;
4872 /* Find the option. */
4873 ch = *p;
4874 opt = N_OPTS;
4875 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4877 type = attrs[i].type;
4878 opt_len = attrs[i].len;
4879 if (ch == attrs[i].string[0]
4880 && ((type != ix86_opt_str && type != ix86_opt_enum)
4881 ? len == opt_len
4882 : len > opt_len)
4883 && memcmp (p, attrs[i].string, opt_len) == 0)
4885 opt = attrs[i].opt;
4886 mask = attrs[i].mask;
4887 opt_string = attrs[i].string;
4888 break;
4892 /* Process the option. */
4893 if (opt == N_OPTS)
4895 error ("attribute(target(\"%s\")) is unknown", orig_p);
4896 ret = false;
4899 else if (type == ix86_opt_isa)
4901 struct cl_decoded_option decoded;
4903 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4904 ix86_handle_option (opts, opts_set,
4905 &decoded, input_location);
4908 else if (type == ix86_opt_yes || type == ix86_opt_no)
4910 if (type == ix86_opt_no)
4911 opt_set_p = !opt_set_p;
4913 if (opt_set_p)
4914 opts->x_target_flags |= mask;
4915 else
4916 opts->x_target_flags &= ~mask;
4919 else if (type == ix86_opt_str)
4921 if (p_strings[opt])
4923 error ("option(\"%s\") was already specified", opt_string);
4924 ret = false;
4926 else
4927 p_strings[opt] = xstrdup (p + opt_len);
4930 else if (type == ix86_opt_enum)
4932 bool arg_ok;
4933 int value;
4935 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4936 if (arg_ok)
4937 set_option (opts, enum_opts_set, opt, value,
4938 p + opt_len, DK_UNSPECIFIED, input_location,
4939 global_dc);
4940 else
4942 error ("attribute(target(\"%s\")) is unknown", orig_p);
4943 ret = false;
4947 else
4948 gcc_unreachable ();
4951 return ret;
4954 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4956 tree
4957 ix86_valid_target_attribute_tree (tree args,
4958 struct gcc_options *opts,
4959 struct gcc_options *opts_set)
4961 const char *orig_arch_string = opts->x_ix86_arch_string;
4962 const char *orig_tune_string = opts->x_ix86_tune_string;
4963 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4964 int orig_tune_defaulted = ix86_tune_defaulted;
4965 int orig_arch_specified = ix86_arch_specified;
4966 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4967 tree t = NULL_TREE;
4968 int i;
4969 struct cl_target_option *def
4970 = TREE_TARGET_OPTION (target_option_default_node);
4971 struct gcc_options enum_opts_set;
4973 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4975 /* Process each of the options on the chain. */
4976 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4977 opts_set, &enum_opts_set))
4978 return error_mark_node;
4980 /* If the changed options are different from the default, rerun
4981 ix86_option_override_internal, and then save the options away.
4982 The string options are are attribute options, and will be undone
4983 when we copy the save structure. */
4984 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4985 || opts->x_target_flags != def->x_target_flags
4986 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4987 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4988 || enum_opts_set.x_ix86_fpmath)
4990 /* If we are using the default tune= or arch=, undo the string assigned,
4991 and use the default. */
4992 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4993 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4994 else if (!orig_arch_specified)
4995 opts->x_ix86_arch_string = NULL;
4997 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4998 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4999 else if (orig_tune_defaulted)
5000 opts->x_ix86_tune_string = NULL;
5002 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5003 if (enum_opts_set.x_ix86_fpmath)
5004 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5005 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5006 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5008 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5009 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5012 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5013 ix86_option_override_internal (false, opts, opts_set);
5015 /* Add any builtin functions with the new isa if any. */
5016 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5018 /* Save the current options unless we are validating options for
5019 #pragma. */
5020 t = build_target_option_node (opts);
5022 opts->x_ix86_arch_string = orig_arch_string;
5023 opts->x_ix86_tune_string = orig_tune_string;
5024 opts_set->x_ix86_fpmath = orig_fpmath_set;
5026 /* Free up memory allocated to hold the strings */
5027 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5028 free (option_strings[i]);
5031 return t;
5034 /* Hook to validate attribute((target("string"))). */
5036 static bool
5037 ix86_valid_target_attribute_p (tree fndecl,
5038 tree ARG_UNUSED (name),
5039 tree args,
5040 int ARG_UNUSED (flags))
5042 struct gcc_options func_options;
5043 tree new_target, new_optimize;
5044 bool ret = true;
5046 /* attribute((target("default"))) does nothing, beyond
5047 affecting multi-versioning. */
5048 if (TREE_VALUE (args)
5049 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5050 && TREE_CHAIN (args) == NULL_TREE
5051 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5052 return true;
5054 tree old_optimize = build_optimization_node (&global_options);
5056 /* Get the optimization options of the current function. */
5057 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5059 if (!func_optimize)
5060 func_optimize = old_optimize;
5062 /* Init func_options. */
5063 memset (&func_options, 0, sizeof (func_options));
5064 init_options_struct (&func_options, NULL);
5065 lang_hooks.init_options_struct (&func_options);
5067 cl_optimization_restore (&func_options,
5068 TREE_OPTIMIZATION (func_optimize));
5070 /* Initialize func_options to the default before its target options can
5071 be set. */
5072 cl_target_option_restore (&func_options,
5073 TREE_TARGET_OPTION (target_option_default_node));
5075 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5076 &global_options_set);
5078 new_optimize = build_optimization_node (&func_options);
5080 if (new_target == error_mark_node)
5081 ret = false;
5083 else if (fndecl && new_target)
5085 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5087 if (old_optimize != new_optimize)
5088 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5091 return ret;
5095 /* Hook to determine if one function can safely inline another. */
5097 static bool
5098 ix86_can_inline_p (tree caller, tree callee)
5100 bool ret = false;
5101 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5102 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5104 /* If callee has no option attributes, then it is ok to inline. */
5105 if (!callee_tree)
5106 ret = true;
5108 /* If caller has no option attributes, but callee does then it is not ok to
5109 inline. */
5110 else if (!caller_tree)
5111 ret = false;
5113 else
5115 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5116 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5118 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5119 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5120 function. */
5121 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5122 != callee_opts->x_ix86_isa_flags)
5123 ret = false;
5125 /* See if we have the same non-isa options. */
5126 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5127 ret = false;
5129 /* See if arch, tune, etc. are the same. */
5130 else if (caller_opts->arch != callee_opts->arch)
5131 ret = false;
5133 else if (caller_opts->tune != callee_opts->tune)
5134 ret = false;
5136 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5137 ret = false;
5139 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5140 ret = false;
5142 else
5143 ret = true;
5146 return ret;
5150 /* Remember the last target of ix86_set_current_function. */
5151 static GTY(()) tree ix86_previous_fndecl;
5153 /* Set targets globals to the default (or current #pragma GCC target
5154 if active). Invalidate ix86_previous_fndecl cache. */
5156 void
5157 ix86_reset_previous_fndecl (void)
5159 tree new_tree = target_option_current_node;
5160 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5161 if (TREE_TARGET_GLOBALS (new_tree))
5162 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5163 else if (new_tree == target_option_default_node)
5164 restore_target_globals (&default_target_globals);
5165 else
5166 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5167 ix86_previous_fndecl = NULL_TREE;
5170 /* Establish appropriate back-end context for processing the function
5171 FNDECL. The argument might be NULL to indicate processing at top
5172 level, outside of any function scope. */
5173 static void
5174 ix86_set_current_function (tree fndecl)
5176 /* Only change the context if the function changes. This hook is called
5177 several times in the course of compiling a function, and we don't want to
5178 slow things down too much or call target_reinit when it isn't safe. */
5179 if (fndecl == ix86_previous_fndecl)
5180 return;
5182 tree old_tree;
5183 if (ix86_previous_fndecl == NULL_TREE)
5184 old_tree = target_option_current_node;
5185 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5186 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5187 else
5188 old_tree = target_option_default_node;
5190 if (fndecl == NULL_TREE)
5192 if (old_tree != target_option_current_node)
5193 ix86_reset_previous_fndecl ();
5194 return;
5197 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5198 if (new_tree == NULL_TREE)
5199 new_tree = target_option_default_node;
5201 if (old_tree != new_tree)
5203 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5204 if (TREE_TARGET_GLOBALS (new_tree))
5205 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5206 else if (new_tree == target_option_default_node)
5207 restore_target_globals (&default_target_globals);
5208 else
5209 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5211 ix86_previous_fndecl = fndecl;
5215 /* Return true if this goes in large data/bss. */
5217 static bool
5218 ix86_in_large_data_p (tree exp)
5220 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5221 return false;
5223 /* Functions are never large data. */
5224 if (TREE_CODE (exp) == FUNCTION_DECL)
5225 return false;
5227 /* Automatic variables are never large data. */
5228 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5229 return false;
5231 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5233 const char *section = DECL_SECTION_NAME (exp);
5234 if (strcmp (section, ".ldata") == 0
5235 || strcmp (section, ".lbss") == 0)
5236 return true;
5237 return false;
5239 else
5241 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5243 /* If this is an incomplete type with size 0, then we can't put it
5244 in data because it might be too big when completed. Also,
5245 int_size_in_bytes returns -1 if size can vary or is larger than
5246 an integer in which case also it is safer to assume that it goes in
5247 large data. */
5248 if (size <= 0 || size > ix86_section_threshold)
5249 return true;
5252 return false;
5255 /* Switch to the appropriate section for output of DECL.
5256 DECL is either a `VAR_DECL' node or a constant of some sort.
5257 RELOC indicates whether forming the initial value of DECL requires
5258 link-time relocations. */
5260 ATTRIBUTE_UNUSED static section *
5261 x86_64_elf_select_section (tree decl, int reloc,
5262 unsigned HOST_WIDE_INT align)
5264 if (ix86_in_large_data_p (decl))
5266 const char *sname = NULL;
5267 unsigned int flags = SECTION_WRITE;
5268 switch (categorize_decl_for_section (decl, reloc))
5270 case SECCAT_DATA:
5271 sname = ".ldata";
5272 break;
5273 case SECCAT_DATA_REL:
5274 sname = ".ldata.rel";
5275 break;
5276 case SECCAT_DATA_REL_LOCAL:
5277 sname = ".ldata.rel.local";
5278 break;
5279 case SECCAT_DATA_REL_RO:
5280 sname = ".ldata.rel.ro";
5281 break;
5282 case SECCAT_DATA_REL_RO_LOCAL:
5283 sname = ".ldata.rel.ro.local";
5284 break;
5285 case SECCAT_BSS:
5286 sname = ".lbss";
5287 flags |= SECTION_BSS;
5288 break;
5289 case SECCAT_RODATA:
5290 case SECCAT_RODATA_MERGE_STR:
5291 case SECCAT_RODATA_MERGE_STR_INIT:
5292 case SECCAT_RODATA_MERGE_CONST:
5293 sname = ".lrodata";
5294 flags = 0;
5295 break;
5296 case SECCAT_SRODATA:
5297 case SECCAT_SDATA:
5298 case SECCAT_SBSS:
5299 gcc_unreachable ();
5300 case SECCAT_TEXT:
5301 case SECCAT_TDATA:
5302 case SECCAT_TBSS:
5303 /* We don't split these for medium model. Place them into
5304 default sections and hope for best. */
5305 break;
5307 if (sname)
5309 /* We might get called with string constants, but get_named_section
5310 doesn't like them as they are not DECLs. Also, we need to set
5311 flags in that case. */
5312 if (!DECL_P (decl))
5313 return get_section (sname, flags, NULL);
5314 return get_named_section (decl, sname, reloc);
5317 return default_elf_select_section (decl, reloc, align);
5320 /* Select a set of attributes for section NAME based on the properties
5321 of DECL and whether or not RELOC indicates that DECL's initializer
5322 might contain runtime relocations. */
5324 static unsigned int ATTRIBUTE_UNUSED
5325 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5327 unsigned int flags = default_section_type_flags (decl, name, reloc);
5329 if (decl == NULL_TREE
5330 && (strcmp (name, ".ldata.rel.ro") == 0
5331 || strcmp (name, ".ldata.rel.ro.local") == 0))
5332 flags |= SECTION_RELRO;
5334 if (strcmp (name, ".lbss") == 0
5335 || strncmp (name, ".lbss.", 5) == 0
5336 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5337 flags |= SECTION_BSS;
5339 return flags;
5342 /* Build up a unique section name, expressed as a
5343 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5344 RELOC indicates whether the initial value of EXP requires
5345 link-time relocations. */
5347 static void ATTRIBUTE_UNUSED
5348 x86_64_elf_unique_section (tree decl, int reloc)
5350 if (ix86_in_large_data_p (decl))
5352 const char *prefix = NULL;
5353 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5354 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5356 switch (categorize_decl_for_section (decl, reloc))
5358 case SECCAT_DATA:
5359 case SECCAT_DATA_REL:
5360 case SECCAT_DATA_REL_LOCAL:
5361 case SECCAT_DATA_REL_RO:
5362 case SECCAT_DATA_REL_RO_LOCAL:
5363 prefix = one_only ? ".ld" : ".ldata";
5364 break;
5365 case SECCAT_BSS:
5366 prefix = one_only ? ".lb" : ".lbss";
5367 break;
5368 case SECCAT_RODATA:
5369 case SECCAT_RODATA_MERGE_STR:
5370 case SECCAT_RODATA_MERGE_STR_INIT:
5371 case SECCAT_RODATA_MERGE_CONST:
5372 prefix = one_only ? ".lr" : ".lrodata";
5373 break;
5374 case SECCAT_SRODATA:
5375 case SECCAT_SDATA:
5376 case SECCAT_SBSS:
5377 gcc_unreachable ();
5378 case SECCAT_TEXT:
5379 case SECCAT_TDATA:
5380 case SECCAT_TBSS:
5381 /* We don't split these for medium model. Place them into
5382 default sections and hope for best. */
5383 break;
5385 if (prefix)
5387 const char *name, *linkonce;
5388 char *string;
5390 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5391 name = targetm.strip_name_encoding (name);
5393 /* If we're using one_only, then there needs to be a .gnu.linkonce
5394 prefix to the section name. */
5395 linkonce = one_only ? ".gnu.linkonce" : "";
5397 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5399 set_decl_section_name (decl, string);
5400 return;
5403 default_unique_section (decl, reloc);
5406 #ifdef COMMON_ASM_OP
5407 /* This says how to output assembler code to declare an
5408 uninitialized external linkage data object.
5410 For medium model x86-64 we need to use .largecomm opcode for
5411 large objects. */
5412 void
5413 x86_elf_aligned_common (FILE *file,
5414 const char *name, unsigned HOST_WIDE_INT size,
5415 int align)
5417 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5418 && size > (unsigned int)ix86_section_threshold)
5419 fputs ("\t.largecomm\t", file);
5420 else
5421 fputs (COMMON_ASM_OP, file);
5422 assemble_name (file, name);
5423 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5424 size, align / BITS_PER_UNIT);
5426 #endif
5428 /* Utility function for targets to use in implementing
5429 ASM_OUTPUT_ALIGNED_BSS. */
5431 void
5432 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5433 unsigned HOST_WIDE_INT size, int align)
5435 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5436 && size > (unsigned int)ix86_section_threshold)
5437 switch_to_section (get_named_section (decl, ".lbss", 0));
5438 else
5439 switch_to_section (bss_section);
5440 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5441 #ifdef ASM_DECLARE_OBJECT_NAME
5442 last_assemble_variable_decl = decl;
5443 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5444 #else
5445 /* Standard thing is just output label for the object. */
5446 ASM_OUTPUT_LABEL (file, name);
5447 #endif /* ASM_DECLARE_OBJECT_NAME */
5448 ASM_OUTPUT_SKIP (file, size ? size : 1);
5451 /* Decide whether we must probe the stack before any space allocation
5452 on this target. It's essentially TARGET_STACK_PROBE except when
5453 -fstack-check causes the stack to be already probed differently. */
5455 bool
5456 ix86_target_stack_probe (void)
5458 /* Do not probe the stack twice if static stack checking is enabled. */
5459 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5460 return false;
5462 return TARGET_STACK_PROBE;
5465 /* Decide whether we can make a sibling call to a function. DECL is the
5466 declaration of the function being targeted by the call and EXP is the
5467 CALL_EXPR representing the call. */
5469 static bool
5470 ix86_function_ok_for_sibcall (tree decl, tree exp)
5472 tree type, decl_or_type;
5473 rtx a, b;
5475 /* If we are generating position-independent code, we cannot sibcall
5476 optimize any indirect call, or a direct call to a global function,
5477 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5478 if (!TARGET_MACHO
5479 && !TARGET_64BIT
5480 && flag_pic
5481 && (!decl || !targetm.binds_local_p (decl)))
5482 return false;
5484 /* If we need to align the outgoing stack, then sibcalling would
5485 unalign the stack, which may break the called function. */
5486 if (ix86_minimum_incoming_stack_boundary (true)
5487 < PREFERRED_STACK_BOUNDARY)
5488 return false;
5490 if (decl)
5492 decl_or_type = decl;
5493 type = TREE_TYPE (decl);
5495 else
5497 /* We're looking at the CALL_EXPR, we need the type of the function. */
5498 type = CALL_EXPR_FN (exp); /* pointer expression */
5499 type = TREE_TYPE (type); /* pointer type */
5500 type = TREE_TYPE (type); /* function type */
5501 decl_or_type = type;
5504 /* Check that the return value locations are the same. Like
5505 if we are returning floats on the 80387 register stack, we cannot
5506 make a sibcall from a function that doesn't return a float to a
5507 function that does or, conversely, from a function that does return
5508 a float to a function that doesn't; the necessary stack adjustment
5509 would not be executed. This is also the place we notice
5510 differences in the return value ABI. Note that it is ok for one
5511 of the functions to have void return type as long as the return
5512 value of the other is passed in a register. */
5513 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5514 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5515 cfun->decl, false);
5516 if (STACK_REG_P (a) || STACK_REG_P (b))
5518 if (!rtx_equal_p (a, b))
5519 return false;
5521 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5523 else if (!rtx_equal_p (a, b))
5524 return false;
5526 if (TARGET_64BIT)
5528 /* The SYSV ABI has more call-clobbered registers;
5529 disallow sibcalls from MS to SYSV. */
5530 if (cfun->machine->call_abi == MS_ABI
5531 && ix86_function_type_abi (type) == SYSV_ABI)
5532 return false;
5534 else
5536 /* If this call is indirect, we'll need to be able to use a
5537 call-clobbered register for the address of the target function.
5538 Make sure that all such registers are not used for passing
5539 parameters. Note that DLLIMPORT functions are indirect. */
5540 if (!decl
5541 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5543 if (ix86_function_regparm (type, NULL) >= 3)
5545 /* ??? Need to count the actual number of registers to be used,
5546 not the possible number of registers. Fix later. */
5547 return false;
5552 /* Otherwise okay. That also includes certain types of indirect calls. */
5553 return true;
5556 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5557 and "sseregparm" calling convention attributes;
5558 arguments as in struct attribute_spec.handler. */
5560 static tree
5561 ix86_handle_cconv_attribute (tree *node, tree name,
5562 tree args,
5563 int,
5564 bool *no_add_attrs)
5566 if (TREE_CODE (*node) != FUNCTION_TYPE
5567 && TREE_CODE (*node) != METHOD_TYPE
5568 && TREE_CODE (*node) != FIELD_DECL
5569 && TREE_CODE (*node) != TYPE_DECL)
5571 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5572 name);
5573 *no_add_attrs = true;
5574 return NULL_TREE;
5577 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5578 if (is_attribute_p ("regparm", name))
5580 tree cst;
5582 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5584 error ("fastcall and regparm attributes are not compatible");
5587 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5589 error ("regparam and thiscall attributes are not compatible");
5592 cst = TREE_VALUE (args);
5593 if (TREE_CODE (cst) != INTEGER_CST)
5595 warning (OPT_Wattributes,
5596 "%qE attribute requires an integer constant argument",
5597 name);
5598 *no_add_attrs = true;
5600 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5602 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5603 name, REGPARM_MAX);
5604 *no_add_attrs = true;
5607 return NULL_TREE;
5610 if (TARGET_64BIT)
5612 /* Do not warn when emulating the MS ABI. */
5613 if ((TREE_CODE (*node) != FUNCTION_TYPE
5614 && TREE_CODE (*node) != METHOD_TYPE)
5615 || ix86_function_type_abi (*node) != MS_ABI)
5616 warning (OPT_Wattributes, "%qE attribute ignored",
5617 name);
5618 *no_add_attrs = true;
5619 return NULL_TREE;
5622 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5623 if (is_attribute_p ("fastcall", name))
5625 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5627 error ("fastcall and cdecl attributes are not compatible");
5629 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5631 error ("fastcall and stdcall attributes are not compatible");
5633 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5635 error ("fastcall and regparm attributes are not compatible");
5637 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5639 error ("fastcall and thiscall attributes are not compatible");
5643 /* Can combine stdcall with fastcall (redundant), regparm and
5644 sseregparm. */
5645 else if (is_attribute_p ("stdcall", name))
5647 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5649 error ("stdcall and cdecl attributes are not compatible");
5651 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5653 error ("stdcall and fastcall attributes are not compatible");
5655 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5657 error ("stdcall and thiscall attributes are not compatible");
5661 /* Can combine cdecl with regparm and sseregparm. */
5662 else if (is_attribute_p ("cdecl", name))
5664 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5666 error ("stdcall and cdecl attributes are not compatible");
5668 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5670 error ("fastcall and cdecl attributes are not compatible");
5672 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5674 error ("cdecl and thiscall attributes are not compatible");
5677 else if (is_attribute_p ("thiscall", name))
5679 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5680 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5681 name);
5682 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5684 error ("stdcall and thiscall attributes are not compatible");
5686 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5688 error ("fastcall and thiscall attributes are not compatible");
5690 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5692 error ("cdecl and thiscall attributes are not compatible");
5696 /* Can combine sseregparm with all attributes. */
5698 return NULL_TREE;
5701 /* The transactional memory builtins are implicitly regparm or fastcall
5702 depending on the ABI. Override the generic do-nothing attribute that
5703 these builtins were declared with, and replace it with one of the two
5704 attributes that we expect elsewhere. */
5706 static tree
5707 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5708 int flags, bool *no_add_attrs)
5710 tree alt;
5712 /* In no case do we want to add the placeholder attribute. */
5713 *no_add_attrs = true;
5715 /* The 64-bit ABI is unchanged for transactional memory. */
5716 if (TARGET_64BIT)
5717 return NULL_TREE;
5719 /* ??? Is there a better way to validate 32-bit windows? We have
5720 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5721 if (CHECK_STACK_LIMIT > 0)
5722 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5723 else
5725 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5726 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5728 decl_attributes (node, alt, flags);
5730 return NULL_TREE;
5733 /* This function determines from TYPE the calling-convention. */
5735 unsigned int
5736 ix86_get_callcvt (const_tree type)
5738 unsigned int ret = 0;
5739 bool is_stdarg;
5740 tree attrs;
5742 if (TARGET_64BIT)
5743 return IX86_CALLCVT_CDECL;
5745 attrs = TYPE_ATTRIBUTES (type);
5746 if (attrs != NULL_TREE)
5748 if (lookup_attribute ("cdecl", attrs))
5749 ret |= IX86_CALLCVT_CDECL;
5750 else if (lookup_attribute ("stdcall", attrs))
5751 ret |= IX86_CALLCVT_STDCALL;
5752 else if (lookup_attribute ("fastcall", attrs))
5753 ret |= IX86_CALLCVT_FASTCALL;
5754 else if (lookup_attribute ("thiscall", attrs))
5755 ret |= IX86_CALLCVT_THISCALL;
5757 /* Regparam isn't allowed for thiscall and fastcall. */
5758 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5760 if (lookup_attribute ("regparm", attrs))
5761 ret |= IX86_CALLCVT_REGPARM;
5762 if (lookup_attribute ("sseregparm", attrs))
5763 ret |= IX86_CALLCVT_SSEREGPARM;
5766 if (IX86_BASE_CALLCVT(ret) != 0)
5767 return ret;
5770 is_stdarg = stdarg_p (type);
5771 if (TARGET_RTD && !is_stdarg)
5772 return IX86_CALLCVT_STDCALL | ret;
5774 if (ret != 0
5775 || is_stdarg
5776 || TREE_CODE (type) != METHOD_TYPE
5777 || ix86_function_type_abi (type) != MS_ABI)
5778 return IX86_CALLCVT_CDECL | ret;
5780 return IX86_CALLCVT_THISCALL;
5783 /* Return 0 if the attributes for two types are incompatible, 1 if they
5784 are compatible, and 2 if they are nearly compatible (which causes a
5785 warning to be generated). */
5787 static int
5788 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5790 unsigned int ccvt1, ccvt2;
5792 if (TREE_CODE (type1) != FUNCTION_TYPE
5793 && TREE_CODE (type1) != METHOD_TYPE)
5794 return 1;
5796 ccvt1 = ix86_get_callcvt (type1);
5797 ccvt2 = ix86_get_callcvt (type2);
5798 if (ccvt1 != ccvt2)
5799 return 0;
5800 if (ix86_function_regparm (type1, NULL)
5801 != ix86_function_regparm (type2, NULL))
5802 return 0;
5804 return 1;
5807 /* Return the regparm value for a function with the indicated TYPE and DECL.
5808 DECL may be NULL when calling function indirectly
5809 or considering a libcall. */
5811 static int
5812 ix86_function_regparm (const_tree type, const_tree decl)
5814 tree attr;
5815 int regparm;
5816 unsigned int ccvt;
5818 if (TARGET_64BIT)
5819 return (ix86_function_type_abi (type) == SYSV_ABI
5820 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5821 ccvt = ix86_get_callcvt (type);
5822 regparm = ix86_regparm;
5824 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5826 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5827 if (attr)
5829 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5830 return regparm;
5833 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5834 return 2;
5835 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5836 return 1;
5838 /* Use register calling convention for local functions when possible. */
5839 if (decl
5840 && TREE_CODE (decl) == FUNCTION_DECL)
5842 cgraph_node *target = cgraph_node::get (decl);
5843 if (target)
5844 target = target->function_symbol ();
5846 /* Caller and callee must agree on the calling convention, so
5847 checking here just optimize means that with
5848 __attribute__((optimize (...))) caller could use regparm convention
5849 and callee not, or vice versa. Instead look at whether the callee
5850 is optimized or not. */
5851 if (target && opt_for_fn (target->decl, optimize)
5852 && !(profile_flag && !flag_fentry))
5854 cgraph_local_info *i = &target->local;
5855 if (i && i->local && i->can_change_signature)
5857 int local_regparm, globals = 0, regno;
5859 /* Make sure no regparm register is taken by a
5860 fixed register variable. */
5861 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5862 local_regparm++)
5863 if (fixed_regs[local_regparm])
5864 break;
5866 /* We don't want to use regparm(3) for nested functions as
5867 these use a static chain pointer in the third argument. */
5868 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5869 local_regparm = 2;
5871 /* Save a register for the split stack. */
5872 if (local_regparm == 3 && flag_split_stack)
5873 local_regparm = 2;
5875 /* Each fixed register usage increases register pressure,
5876 so less registers should be used for argument passing.
5877 This functionality can be overriden by an explicit
5878 regparm value. */
5879 for (regno = AX_REG; regno <= DI_REG; regno++)
5880 if (fixed_regs[regno])
5881 globals++;
5883 local_regparm
5884 = globals < local_regparm ? local_regparm - globals : 0;
5886 if (local_regparm > regparm)
5887 regparm = local_regparm;
5892 return regparm;
5895 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5896 DFmode (2) arguments in SSE registers for a function with the
5897 indicated TYPE and DECL. DECL may be NULL when calling function
5898 indirectly or considering a libcall. Otherwise return 0. */
5900 static int
5901 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5903 gcc_assert (!TARGET_64BIT);
5905 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5906 by the sseregparm attribute. */
5907 if (TARGET_SSEREGPARM
5908 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5910 if (!TARGET_SSE)
5912 if (warn)
5914 if (decl)
5915 error ("calling %qD with attribute sseregparm without "
5916 "SSE/SSE2 enabled", decl);
5917 else
5918 error ("calling %qT with attribute sseregparm without "
5919 "SSE/SSE2 enabled", type);
5921 return 0;
5924 return 2;
5927 if (!decl)
5928 return 0;
5930 cgraph_node *target = cgraph_node::get (decl);
5931 if (target)
5932 target = target->function_symbol ();
5934 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5935 (and DFmode for SSE2) arguments in SSE registers. */
5936 if (target
5937 /* TARGET_SSE_MATH */
5938 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5939 && opt_for_fn (target->decl, optimize)
5940 && !(profile_flag && !flag_fentry))
5942 cgraph_local_info *i = &target->local;
5943 if (i && i->local && i->can_change_signature)
5945 /* Refuse to produce wrong code when local function with SSE enabled
5946 is called from SSE disabled function.
5947 We may work hard to work out these scenarios but hopefully
5948 it doesnot matter in practice. */
5949 if (!TARGET_SSE && warn)
5951 error ("calling %qD with SSE caling convention without "
5952 "SSE/SSE2 enabled", decl);
5953 return 0;
5955 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5956 ->x_ix86_isa_flags) ? 2 : 1;
5960 return 0;
5963 /* Return true if EAX is live at the start of the function. Used by
5964 ix86_expand_prologue to determine if we need special help before
5965 calling allocate_stack_worker. */
5967 static bool
5968 ix86_eax_live_at_start_p (void)
5970 /* Cheat. Don't bother working forward from ix86_function_regparm
5971 to the function type to whether an actual argument is located in
5972 eax. Instead just look at cfg info, which is still close enough
5973 to correct at this point. This gives false positives for broken
5974 functions that might use uninitialized data that happens to be
5975 allocated in eax, but who cares? */
5976 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5979 static bool
5980 ix86_keep_aggregate_return_pointer (tree fntype)
5982 tree attr;
5984 if (!TARGET_64BIT)
5986 attr = lookup_attribute ("callee_pop_aggregate_return",
5987 TYPE_ATTRIBUTES (fntype));
5988 if (attr)
5989 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5991 /* For 32-bit MS-ABI the default is to keep aggregate
5992 return pointer. */
5993 if (ix86_function_type_abi (fntype) == MS_ABI)
5994 return true;
5996 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5999 /* Value is the number of bytes of arguments automatically
6000 popped when returning from a subroutine call.
6001 FUNDECL is the declaration node of the function (as a tree),
6002 FUNTYPE is the data type of the function (as a tree),
6003 or for a library call it is an identifier node for the subroutine name.
6004 SIZE is the number of bytes of arguments passed on the stack.
6006 On the 80386, the RTD insn may be used to pop them if the number
6007 of args is fixed, but if the number is variable then the caller
6008 must pop them all. RTD can't be used for library calls now
6009 because the library is compiled with the Unix compiler.
6010 Use of RTD is a selectable option, since it is incompatible with
6011 standard Unix calling sequences. If the option is not selected,
6012 the caller must always pop the args.
6014 The attribute stdcall is equivalent to RTD on a per module basis. */
6016 static int
6017 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6019 unsigned int ccvt;
6021 /* None of the 64-bit ABIs pop arguments. */
6022 if (TARGET_64BIT)
6023 return 0;
6025 ccvt = ix86_get_callcvt (funtype);
6027 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6028 | IX86_CALLCVT_THISCALL)) != 0
6029 && ! stdarg_p (funtype))
6030 return size;
6032 /* Lose any fake structure return argument if it is passed on the stack. */
6033 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6034 && !ix86_keep_aggregate_return_pointer (funtype))
6036 int nregs = ix86_function_regparm (funtype, fundecl);
6037 if (nregs == 0)
6038 return GET_MODE_SIZE (Pmode);
6041 return 0;
6044 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6046 static bool
6047 ix86_legitimate_combined_insn (rtx_insn *insn)
6049 /* Check operand constraints in case hard registers were propagated
6050 into insn pattern. This check prevents combine pass from
6051 generating insn patterns with invalid hard register operands.
6052 These invalid insns can eventually confuse reload to error out
6053 with a spill failure. See also PRs 46829 and 46843. */
6054 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6056 int i;
6058 extract_insn (insn);
6059 preprocess_constraints (insn);
6061 int n_operands = recog_data.n_operands;
6062 int n_alternatives = recog_data.n_alternatives;
6063 for (i = 0; i < n_operands; i++)
6065 rtx op = recog_data.operand[i];
6066 machine_mode mode = GET_MODE (op);
6067 const operand_alternative *op_alt;
6068 int offset = 0;
6069 bool win;
6070 int j;
6072 /* For pre-AVX disallow unaligned loads/stores where the
6073 instructions don't support it. */
6074 if (!TARGET_AVX
6075 && VECTOR_MODE_P (GET_MODE (op))
6076 && misaligned_operand (op, GET_MODE (op)))
6078 int min_align = get_attr_ssememalign (insn);
6079 if (min_align == 0)
6080 return false;
6083 /* A unary operator may be accepted by the predicate, but it
6084 is irrelevant for matching constraints. */
6085 if (UNARY_P (op))
6086 op = XEXP (op, 0);
6088 if (GET_CODE (op) == SUBREG)
6090 if (REG_P (SUBREG_REG (op))
6091 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6092 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6093 GET_MODE (SUBREG_REG (op)),
6094 SUBREG_BYTE (op),
6095 GET_MODE (op));
6096 op = SUBREG_REG (op);
6099 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6100 continue;
6102 op_alt = recog_op_alt;
6104 /* Operand has no constraints, anything is OK. */
6105 win = !n_alternatives;
6107 alternative_mask preferred = get_preferred_alternatives (insn);
6108 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6110 if (!TEST_BIT (preferred, j))
6111 continue;
6112 if (op_alt[i].anything_ok
6113 || (op_alt[i].matches != -1
6114 && operands_match_p
6115 (recog_data.operand[i],
6116 recog_data.operand[op_alt[i].matches]))
6117 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6119 win = true;
6120 break;
6124 if (!win)
6125 return false;
6129 return true;
6132 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6134 static unsigned HOST_WIDE_INT
6135 ix86_asan_shadow_offset (void)
6137 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6138 : HOST_WIDE_INT_C (0x7fff8000))
6139 : (HOST_WIDE_INT_1 << 29);
6142 /* Argument support functions. */
6144 /* Return true when register may be used to pass function parameters. */
6145 bool
6146 ix86_function_arg_regno_p (int regno)
6148 int i;
6149 const int *parm_regs;
6151 if (TARGET_MPX && BND_REGNO_P (regno))
6152 return true;
6154 if (!TARGET_64BIT)
6156 if (TARGET_MACHO)
6157 return (regno < REGPARM_MAX
6158 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6159 else
6160 return (regno < REGPARM_MAX
6161 || (TARGET_MMX && MMX_REGNO_P (regno)
6162 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6163 || (TARGET_SSE && SSE_REGNO_P (regno)
6164 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6167 if (TARGET_SSE && SSE_REGNO_P (regno)
6168 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6169 return true;
6171 /* TODO: The function should depend on current function ABI but
6172 builtins.c would need updating then. Therefore we use the
6173 default ABI. */
6175 /* RAX is used as hidden argument to va_arg functions. */
6176 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6177 return true;
6179 if (ix86_abi == MS_ABI)
6180 parm_regs = x86_64_ms_abi_int_parameter_registers;
6181 else
6182 parm_regs = x86_64_int_parameter_registers;
6183 for (i = 0; i < (ix86_abi == MS_ABI
6184 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6185 if (regno == parm_regs[i])
6186 return true;
6187 return false;
6190 /* Return if we do not know how to pass TYPE solely in registers. */
6192 static bool
6193 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6195 if (must_pass_in_stack_var_size_or_pad (mode, type))
6196 return true;
6198 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6199 The layout_type routine is crafty and tries to trick us into passing
6200 currently unsupported vector types on the stack by using TImode. */
6201 return (!TARGET_64BIT && mode == TImode
6202 && type && TREE_CODE (type) != VECTOR_TYPE);
6205 /* It returns the size, in bytes, of the area reserved for arguments passed
6206 in registers for the function represented by fndecl dependent to the used
6207 abi format. */
6209 ix86_reg_parm_stack_space (const_tree fndecl)
6211 enum calling_abi call_abi = SYSV_ABI;
6212 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6213 call_abi = ix86_function_abi (fndecl);
6214 else
6215 call_abi = ix86_function_type_abi (fndecl);
6216 if (TARGET_64BIT && call_abi == MS_ABI)
6217 return 32;
6218 return 0;
6221 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6222 call abi used. */
6223 enum calling_abi
6224 ix86_function_type_abi (const_tree fntype)
6226 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6228 enum calling_abi abi = ix86_abi;
6229 if (abi == SYSV_ABI)
6231 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6233 if (TARGET_X32)
6235 static bool warned = false;
6236 if (!warned)
6238 error ("X32 does not support ms_abi attribute");
6239 warned = true;
6242 abi = MS_ABI;
6245 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6246 abi = SYSV_ABI;
6247 return abi;
6249 return ix86_abi;
6252 /* We add this as a workaround in order to use libc_has_function
6253 hook in i386.md. */
6254 bool
6255 ix86_libc_has_function (enum function_class fn_class)
6257 return targetm.libc_has_function (fn_class);
6260 static bool
6261 ix86_function_ms_hook_prologue (const_tree fn)
6263 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6265 if (decl_function_context (fn) != NULL_TREE)
6266 error_at (DECL_SOURCE_LOCATION (fn),
6267 "ms_hook_prologue is not compatible with nested function");
6268 else
6269 return true;
6271 return false;
6274 static enum calling_abi
6275 ix86_function_abi (const_tree fndecl)
6277 if (! fndecl)
6278 return ix86_abi;
6279 return ix86_function_type_abi (TREE_TYPE (fndecl));
6282 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6283 call abi used. */
6284 enum calling_abi
6285 ix86_cfun_abi (void)
6287 if (! cfun)
6288 return ix86_abi;
6289 return cfun->machine->call_abi;
6292 /* Write the extra assembler code needed to declare a function properly. */
6294 void
6295 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6296 tree decl)
6298 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6300 if (is_ms_hook)
6302 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6303 unsigned int filler_cc = 0xcccccccc;
6305 for (i = 0; i < filler_count; i += 4)
6306 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6309 #ifdef SUBTARGET_ASM_UNWIND_INIT
6310 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6311 #endif
6313 ASM_OUTPUT_LABEL (asm_out_file, fname);
6315 /* Output magic byte marker, if hot-patch attribute is set. */
6316 if (is_ms_hook)
6318 if (TARGET_64BIT)
6320 /* leaq [%rsp + 0], %rsp */
6321 asm_fprintf (asm_out_file, ASM_BYTE
6322 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6324 else
6326 /* movl.s %edi, %edi
6327 push %ebp
6328 movl.s %esp, %ebp */
6329 asm_fprintf (asm_out_file, ASM_BYTE
6330 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6335 /* regclass.c */
6336 extern void init_regs (void);
6338 /* Implementation of call abi switching target hook. Specific to FNDECL
6339 the specific call register sets are set. See also
6340 ix86_conditional_register_usage for more details. */
6341 void
6342 ix86_call_abi_override (const_tree fndecl)
6344 if (fndecl == NULL_TREE)
6345 cfun->machine->call_abi = ix86_abi;
6346 else
6347 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6350 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6351 expensive re-initialization of init_regs each time we switch function context
6352 since this is needed only during RTL expansion. */
6353 static void
6354 ix86_maybe_switch_abi (void)
6356 if (TARGET_64BIT &&
6357 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6358 reinit_regs ();
6361 /* Return 1 if pseudo register should be created and used to hold
6362 GOT address for PIC code. */
6363 bool
6364 ix86_use_pseudo_pic_reg (void)
6366 if ((TARGET_64BIT
6367 && (ix86_cmodel == CM_SMALL_PIC
6368 || TARGET_PECOFF))
6369 || !flag_pic)
6370 return false;
6371 return true;
6374 /* Initialize large model PIC register. */
6376 static void
6377 ix86_init_large_pic_reg (unsigned int tmp_regno)
6379 rtx_code_label *label;
6380 rtx tmp_reg;
6382 gcc_assert (Pmode == DImode);
6383 label = gen_label_rtx ();
6384 emit_label (label);
6385 LABEL_PRESERVE_P (label) = 1;
6386 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6387 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6388 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6389 label));
6390 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6391 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6392 pic_offset_table_rtx, tmp_reg));
6395 /* Create and initialize PIC register if required. */
6396 static void
6397 ix86_init_pic_reg (void)
6399 edge entry_edge;
6400 rtx_insn *seq;
6402 if (!ix86_use_pseudo_pic_reg ())
6403 return;
6405 start_sequence ();
6407 if (TARGET_64BIT)
6409 if (ix86_cmodel == CM_LARGE_PIC)
6410 ix86_init_large_pic_reg (R11_REG);
6411 else
6412 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6414 else
6416 /* If there is future mcount call in the function it is more profitable
6417 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6418 rtx reg = crtl->profile
6419 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6420 : pic_offset_table_rtx;
6421 rtx_insn *insn = emit_insn (gen_set_got (reg));
6422 RTX_FRAME_RELATED_P (insn) = 1;
6423 if (crtl->profile)
6424 emit_move_insn (pic_offset_table_rtx, reg);
6425 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6428 seq = get_insns ();
6429 end_sequence ();
6431 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6432 insert_insn_on_edge (seq, entry_edge);
6433 commit_one_edge_insertion (entry_edge);
6436 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6437 for a call to a function whose data type is FNTYPE.
6438 For a library call, FNTYPE is 0. */
6440 void
6441 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6442 tree fntype, /* tree ptr for function decl */
6443 rtx libname, /* SYMBOL_REF of library name or 0 */
6444 tree fndecl,
6445 int caller)
6447 struct cgraph_local_info *i = NULL;
6448 struct cgraph_node *target = NULL;
6450 memset (cum, 0, sizeof (*cum));
6452 if (fndecl)
6454 target = cgraph_node::get (fndecl);
6455 if (target)
6457 target = target->function_symbol ();
6458 i = cgraph_node::local_info (target->decl);
6459 cum->call_abi = ix86_function_abi (target->decl);
6461 else
6462 cum->call_abi = ix86_function_abi (fndecl);
6464 else
6465 cum->call_abi = ix86_function_type_abi (fntype);
6467 cum->caller = caller;
6469 /* Set up the number of registers to use for passing arguments. */
6470 cum->nregs = ix86_regparm;
6471 if (TARGET_64BIT)
6473 cum->nregs = (cum->call_abi == SYSV_ABI
6474 ? X86_64_REGPARM_MAX
6475 : X86_64_MS_REGPARM_MAX);
6477 if (TARGET_SSE)
6479 cum->sse_nregs = SSE_REGPARM_MAX;
6480 if (TARGET_64BIT)
6482 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6483 ? X86_64_SSE_REGPARM_MAX
6484 : X86_64_MS_SSE_REGPARM_MAX);
6487 if (TARGET_MMX)
6488 cum->mmx_nregs = MMX_REGPARM_MAX;
6489 cum->warn_avx512f = true;
6490 cum->warn_avx = true;
6491 cum->warn_sse = true;
6492 cum->warn_mmx = true;
6494 /* Because type might mismatch in between caller and callee, we need to
6495 use actual type of function for local calls.
6496 FIXME: cgraph_analyze can be told to actually record if function uses
6497 va_start so for local functions maybe_vaarg can be made aggressive
6498 helping K&R code.
6499 FIXME: once typesytem is fixed, we won't need this code anymore. */
6500 if (i && i->local && i->can_change_signature)
6501 fntype = TREE_TYPE (target->decl);
6502 cum->stdarg = stdarg_p (fntype);
6503 cum->maybe_vaarg = (fntype
6504 ? (!prototype_p (fntype) || stdarg_p (fntype))
6505 : !libname);
6507 cum->bnd_regno = FIRST_BND_REG;
6508 cum->bnds_in_bt = 0;
6509 cum->force_bnd_pass = 0;
6511 if (!TARGET_64BIT)
6513 /* If there are variable arguments, then we won't pass anything
6514 in registers in 32-bit mode. */
6515 if (stdarg_p (fntype))
6517 cum->nregs = 0;
6518 cum->sse_nregs = 0;
6519 cum->mmx_nregs = 0;
6520 cum->warn_avx512f = false;
6521 cum->warn_avx = false;
6522 cum->warn_sse = false;
6523 cum->warn_mmx = false;
6524 return;
6527 /* Use ecx and edx registers if function has fastcall attribute,
6528 else look for regparm information. */
6529 if (fntype)
6531 unsigned int ccvt = ix86_get_callcvt (fntype);
6532 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6534 cum->nregs = 1;
6535 cum->fastcall = 1; /* Same first register as in fastcall. */
6537 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6539 cum->nregs = 2;
6540 cum->fastcall = 1;
6542 else
6543 cum->nregs = ix86_function_regparm (fntype, fndecl);
6546 /* Set up the number of SSE registers used for passing SFmode
6547 and DFmode arguments. Warn for mismatching ABI. */
6548 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6552 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6553 But in the case of vector types, it is some vector mode.
6555 When we have only some of our vector isa extensions enabled, then there
6556 are some modes for which vector_mode_supported_p is false. For these
6557 modes, the generic vector support in gcc will choose some non-vector mode
6558 in order to implement the type. By computing the natural mode, we'll
6559 select the proper ABI location for the operand and not depend on whatever
6560 the middle-end decides to do with these vector types.
6562 The midde-end can't deal with the vector types > 16 bytes. In this
6563 case, we return the original mode and warn ABI change if CUM isn't
6564 NULL.
6566 If INT_RETURN is true, warn ABI change if the vector mode isn't
6567 available for function return value. */
6569 static machine_mode
6570 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6571 bool in_return)
6573 machine_mode mode = TYPE_MODE (type);
6575 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6577 HOST_WIDE_INT size = int_size_in_bytes (type);
6578 if ((size == 8 || size == 16 || size == 32 || size == 64)
6579 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6580 && TYPE_VECTOR_SUBPARTS (type) > 1)
6582 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6584 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6585 mode = MIN_MODE_VECTOR_FLOAT;
6586 else
6587 mode = MIN_MODE_VECTOR_INT;
6589 /* Get the mode which has this inner mode and number of units. */
6590 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6591 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6592 && GET_MODE_INNER (mode) == innermode)
6594 if (size == 64 && !TARGET_AVX512F)
6596 static bool warnedavx512f;
6597 static bool warnedavx512f_ret;
6599 if (cum && cum->warn_avx512f && !warnedavx512f)
6601 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6602 "without AVX512F enabled changes the ABI"))
6603 warnedavx512f = true;
6605 else if (in_return && !warnedavx512f_ret)
6607 if (warning (OPT_Wpsabi, "AVX512F vector return "
6608 "without AVX512F enabled changes the ABI"))
6609 warnedavx512f_ret = true;
6612 return TYPE_MODE (type);
6614 else if (size == 32 && !TARGET_AVX)
6616 static bool warnedavx;
6617 static bool warnedavx_ret;
6619 if (cum && cum->warn_avx && !warnedavx)
6621 if (warning (OPT_Wpsabi, "AVX vector argument "
6622 "without AVX enabled changes the ABI"))
6623 warnedavx = true;
6625 else if (in_return && !warnedavx_ret)
6627 if (warning (OPT_Wpsabi, "AVX vector return "
6628 "without AVX enabled changes the ABI"))
6629 warnedavx_ret = true;
6632 return TYPE_MODE (type);
6634 else if (((size == 8 && TARGET_64BIT) || size == 16)
6635 && !TARGET_SSE)
6637 static bool warnedsse;
6638 static bool warnedsse_ret;
6640 if (cum && cum->warn_sse && !warnedsse)
6642 if (warning (OPT_Wpsabi, "SSE vector argument "
6643 "without SSE enabled changes the ABI"))
6644 warnedsse = true;
6646 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6648 if (warning (OPT_Wpsabi, "SSE vector return "
6649 "without SSE enabled changes the ABI"))
6650 warnedsse_ret = true;
6653 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6655 static bool warnedmmx;
6656 static bool warnedmmx_ret;
6658 if (cum && cum->warn_mmx && !warnedmmx)
6660 if (warning (OPT_Wpsabi, "MMX vector argument "
6661 "without MMX enabled changes the ABI"))
6662 warnedmmx = true;
6664 else if (in_return && !warnedmmx_ret)
6666 if (warning (OPT_Wpsabi, "MMX vector return "
6667 "without MMX enabled changes the ABI"))
6668 warnedmmx_ret = true;
6671 return mode;
6674 gcc_unreachable ();
6678 return mode;
6681 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6682 this may not agree with the mode that the type system has chosen for the
6683 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6684 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6686 static rtx
6687 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6688 unsigned int regno)
6690 rtx tmp;
6692 if (orig_mode != BLKmode)
6693 tmp = gen_rtx_REG (orig_mode, regno);
6694 else
6696 tmp = gen_rtx_REG (mode, regno);
6697 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6698 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6701 return tmp;
6704 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6705 of this code is to classify each 8bytes of incoming argument by the register
6706 class and assign registers accordingly. */
6708 /* Return the union class of CLASS1 and CLASS2.
6709 See the x86-64 PS ABI for details. */
6711 static enum x86_64_reg_class
6712 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6714 /* Rule #1: If both classes are equal, this is the resulting class. */
6715 if (class1 == class2)
6716 return class1;
6718 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6719 the other class. */
6720 if (class1 == X86_64_NO_CLASS)
6721 return class2;
6722 if (class2 == X86_64_NO_CLASS)
6723 return class1;
6725 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6726 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6727 return X86_64_MEMORY_CLASS;
6729 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6730 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6731 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6732 return X86_64_INTEGERSI_CLASS;
6733 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6734 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6735 return X86_64_INTEGER_CLASS;
6737 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6738 MEMORY is used. */
6739 if (class1 == X86_64_X87_CLASS
6740 || class1 == X86_64_X87UP_CLASS
6741 || class1 == X86_64_COMPLEX_X87_CLASS
6742 || class2 == X86_64_X87_CLASS
6743 || class2 == X86_64_X87UP_CLASS
6744 || class2 == X86_64_COMPLEX_X87_CLASS)
6745 return X86_64_MEMORY_CLASS;
6747 /* Rule #6: Otherwise class SSE is used. */
6748 return X86_64_SSE_CLASS;
6751 /* Classify the argument of type TYPE and mode MODE.
6752 CLASSES will be filled by the register class used to pass each word
6753 of the operand. The number of words is returned. In case the parameter
6754 should be passed in memory, 0 is returned. As a special case for zero
6755 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6757 BIT_OFFSET is used internally for handling records and specifies offset
6758 of the offset in bits modulo 512 to avoid overflow cases.
6760 See the x86-64 PS ABI for details.
6763 static int
6764 classify_argument (machine_mode mode, const_tree type,
6765 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6767 HOST_WIDE_INT bytes =
6768 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6769 int words
6770 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6772 /* Variable sized entities are always passed/returned in memory. */
6773 if (bytes < 0)
6774 return 0;
6776 if (mode != VOIDmode
6777 && targetm.calls.must_pass_in_stack (mode, type))
6778 return 0;
6780 if (type && AGGREGATE_TYPE_P (type))
6782 int i;
6783 tree field;
6784 enum x86_64_reg_class subclasses[MAX_CLASSES];
6786 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6787 if (bytes > 64)
6788 return 0;
6790 for (i = 0; i < words; i++)
6791 classes[i] = X86_64_NO_CLASS;
6793 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6794 signalize memory class, so handle it as special case. */
6795 if (!words)
6797 classes[0] = X86_64_NO_CLASS;
6798 return 1;
6801 /* Classify each field of record and merge classes. */
6802 switch (TREE_CODE (type))
6804 case RECORD_TYPE:
6805 /* And now merge the fields of structure. */
6806 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6808 if (TREE_CODE (field) == FIELD_DECL)
6810 int num;
6812 if (TREE_TYPE (field) == error_mark_node)
6813 continue;
6815 /* Bitfields are always classified as integer. Handle them
6816 early, since later code would consider them to be
6817 misaligned integers. */
6818 if (DECL_BIT_FIELD (field))
6820 for (i = (int_bit_position (field)
6821 + (bit_offset % 64)) / 8 / 8;
6822 i < ((int_bit_position (field) + (bit_offset % 64))
6823 + tree_to_shwi (DECL_SIZE (field))
6824 + 63) / 8 / 8; i++)
6825 classes[i] =
6826 merge_classes (X86_64_INTEGER_CLASS,
6827 classes[i]);
6829 else
6831 int pos;
6833 type = TREE_TYPE (field);
6835 /* Flexible array member is ignored. */
6836 if (TYPE_MODE (type) == BLKmode
6837 && TREE_CODE (type) == ARRAY_TYPE
6838 && TYPE_SIZE (type) == NULL_TREE
6839 && TYPE_DOMAIN (type) != NULL_TREE
6840 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6841 == NULL_TREE))
6843 static bool warned;
6845 if (!warned && warn_psabi)
6847 warned = true;
6848 inform (input_location,
6849 "the ABI of passing struct with"
6850 " a flexible array member has"
6851 " changed in GCC 4.4");
6853 continue;
6855 num = classify_argument (TYPE_MODE (type), type,
6856 subclasses,
6857 (int_bit_position (field)
6858 + bit_offset) % 512);
6859 if (!num)
6860 return 0;
6861 pos = (int_bit_position (field)
6862 + (bit_offset % 64)) / 8 / 8;
6863 for (i = 0; i < num && (i + pos) < words; i++)
6864 classes[i + pos] =
6865 merge_classes (subclasses[i], classes[i + pos]);
6869 break;
6871 case ARRAY_TYPE:
6872 /* Arrays are handled as small records. */
6874 int num;
6875 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6876 TREE_TYPE (type), subclasses, bit_offset);
6877 if (!num)
6878 return 0;
6880 /* The partial classes are now full classes. */
6881 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6882 subclasses[0] = X86_64_SSE_CLASS;
6883 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6884 && !((bit_offset % 64) == 0 && bytes == 4))
6885 subclasses[0] = X86_64_INTEGER_CLASS;
6887 for (i = 0; i < words; i++)
6888 classes[i] = subclasses[i % num];
6890 break;
6892 case UNION_TYPE:
6893 case QUAL_UNION_TYPE:
6894 /* Unions are similar to RECORD_TYPE but offset is always 0.
6896 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6898 if (TREE_CODE (field) == FIELD_DECL)
6900 int num;
6902 if (TREE_TYPE (field) == error_mark_node)
6903 continue;
6905 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6906 TREE_TYPE (field), subclasses,
6907 bit_offset);
6908 if (!num)
6909 return 0;
6910 for (i = 0; i < num && i < words; i++)
6911 classes[i] = merge_classes (subclasses[i], classes[i]);
6914 break;
6916 default:
6917 gcc_unreachable ();
6920 if (words > 2)
6922 /* When size > 16 bytes, if the first one isn't
6923 X86_64_SSE_CLASS or any other ones aren't
6924 X86_64_SSEUP_CLASS, everything should be passed in
6925 memory. */
6926 if (classes[0] != X86_64_SSE_CLASS)
6927 return 0;
6929 for (i = 1; i < words; i++)
6930 if (classes[i] != X86_64_SSEUP_CLASS)
6931 return 0;
6934 /* Final merger cleanup. */
6935 for (i = 0; i < words; i++)
6937 /* If one class is MEMORY, everything should be passed in
6938 memory. */
6939 if (classes[i] == X86_64_MEMORY_CLASS)
6940 return 0;
6942 /* The X86_64_SSEUP_CLASS should be always preceded by
6943 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6944 if (classes[i] == X86_64_SSEUP_CLASS
6945 && classes[i - 1] != X86_64_SSE_CLASS
6946 && classes[i - 1] != X86_64_SSEUP_CLASS)
6948 /* The first one should never be X86_64_SSEUP_CLASS. */
6949 gcc_assert (i != 0);
6950 classes[i] = X86_64_SSE_CLASS;
6953 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6954 everything should be passed in memory. */
6955 if (classes[i] == X86_64_X87UP_CLASS
6956 && (classes[i - 1] != X86_64_X87_CLASS))
6958 static bool warned;
6960 /* The first one should never be X86_64_X87UP_CLASS. */
6961 gcc_assert (i != 0);
6962 if (!warned && warn_psabi)
6964 warned = true;
6965 inform (input_location,
6966 "the ABI of passing union with long double"
6967 " has changed in GCC 4.4");
6969 return 0;
6972 return words;
6975 /* Compute alignment needed. We align all types to natural boundaries with
6976 exception of XFmode that is aligned to 64bits. */
6977 if (mode != VOIDmode && mode != BLKmode)
6979 int mode_alignment = GET_MODE_BITSIZE (mode);
6981 if (mode == XFmode)
6982 mode_alignment = 128;
6983 else if (mode == XCmode)
6984 mode_alignment = 256;
6985 if (COMPLEX_MODE_P (mode))
6986 mode_alignment /= 2;
6987 /* Misaligned fields are always returned in memory. */
6988 if (bit_offset % mode_alignment)
6989 return 0;
6992 /* for V1xx modes, just use the base mode */
6993 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6994 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6995 mode = GET_MODE_INNER (mode);
6997 /* Classification of atomic types. */
6998 switch (mode)
7000 case SDmode:
7001 case DDmode:
7002 classes[0] = X86_64_SSE_CLASS;
7003 return 1;
7004 case TDmode:
7005 classes[0] = X86_64_SSE_CLASS;
7006 classes[1] = X86_64_SSEUP_CLASS;
7007 return 2;
7008 case DImode:
7009 case SImode:
7010 case HImode:
7011 case QImode:
7012 case CSImode:
7013 case CHImode:
7014 case CQImode:
7016 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7018 /* Analyze last 128 bits only. */
7019 size = (size - 1) & 0x7f;
7021 if (size < 32)
7023 classes[0] = X86_64_INTEGERSI_CLASS;
7024 return 1;
7026 else if (size < 64)
7028 classes[0] = X86_64_INTEGER_CLASS;
7029 return 1;
7031 else if (size < 64+32)
7033 classes[0] = X86_64_INTEGER_CLASS;
7034 classes[1] = X86_64_INTEGERSI_CLASS;
7035 return 2;
7037 else if (size < 64+64)
7039 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7040 return 2;
7042 else
7043 gcc_unreachable ();
7045 case CDImode:
7046 case TImode:
7047 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7048 return 2;
7049 case COImode:
7050 case OImode:
7051 /* OImode shouldn't be used directly. */
7052 gcc_unreachable ();
7053 case CTImode:
7054 return 0;
7055 case SFmode:
7056 if (!(bit_offset % 64))
7057 classes[0] = X86_64_SSESF_CLASS;
7058 else
7059 classes[0] = X86_64_SSE_CLASS;
7060 return 1;
7061 case DFmode:
7062 classes[0] = X86_64_SSEDF_CLASS;
7063 return 1;
7064 case XFmode:
7065 classes[0] = X86_64_X87_CLASS;
7066 classes[1] = X86_64_X87UP_CLASS;
7067 return 2;
7068 case TFmode:
7069 classes[0] = X86_64_SSE_CLASS;
7070 classes[1] = X86_64_SSEUP_CLASS;
7071 return 2;
7072 case SCmode:
7073 classes[0] = X86_64_SSE_CLASS;
7074 if (!(bit_offset % 64))
7075 return 1;
7076 else
7078 static bool warned;
7080 if (!warned && warn_psabi)
7082 warned = true;
7083 inform (input_location,
7084 "the ABI of passing structure with complex float"
7085 " member has changed in GCC 4.4");
7087 classes[1] = X86_64_SSESF_CLASS;
7088 return 2;
7090 case DCmode:
7091 classes[0] = X86_64_SSEDF_CLASS;
7092 classes[1] = X86_64_SSEDF_CLASS;
7093 return 2;
7094 case XCmode:
7095 classes[0] = X86_64_COMPLEX_X87_CLASS;
7096 return 1;
7097 case TCmode:
7098 /* This modes is larger than 16 bytes. */
7099 return 0;
7100 case V8SFmode:
7101 case V8SImode:
7102 case V32QImode:
7103 case V16HImode:
7104 case V4DFmode:
7105 case V4DImode:
7106 classes[0] = X86_64_SSE_CLASS;
7107 classes[1] = X86_64_SSEUP_CLASS;
7108 classes[2] = X86_64_SSEUP_CLASS;
7109 classes[3] = X86_64_SSEUP_CLASS;
7110 return 4;
7111 case V8DFmode:
7112 case V16SFmode:
7113 case V8DImode:
7114 case V16SImode:
7115 case V32HImode:
7116 case V64QImode:
7117 classes[0] = X86_64_SSE_CLASS;
7118 classes[1] = X86_64_SSEUP_CLASS;
7119 classes[2] = X86_64_SSEUP_CLASS;
7120 classes[3] = X86_64_SSEUP_CLASS;
7121 classes[4] = X86_64_SSEUP_CLASS;
7122 classes[5] = X86_64_SSEUP_CLASS;
7123 classes[6] = X86_64_SSEUP_CLASS;
7124 classes[7] = X86_64_SSEUP_CLASS;
7125 return 8;
7126 case V4SFmode:
7127 case V4SImode:
7128 case V16QImode:
7129 case V8HImode:
7130 case V2DFmode:
7131 case V2DImode:
7132 classes[0] = X86_64_SSE_CLASS;
7133 classes[1] = X86_64_SSEUP_CLASS;
7134 return 2;
7135 case V1TImode:
7136 case V1DImode:
7137 case V2SFmode:
7138 case V2SImode:
7139 case V4HImode:
7140 case V8QImode:
7141 classes[0] = X86_64_SSE_CLASS;
7142 return 1;
7143 case BLKmode:
7144 case VOIDmode:
7145 return 0;
7146 default:
7147 gcc_assert (VECTOR_MODE_P (mode));
7149 if (bytes > 16)
7150 return 0;
7152 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7154 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7155 classes[0] = X86_64_INTEGERSI_CLASS;
7156 else
7157 classes[0] = X86_64_INTEGER_CLASS;
7158 classes[1] = X86_64_INTEGER_CLASS;
7159 return 1 + (bytes > 8);
7163 /* Examine the argument and return set number of register required in each
7164 class. Return true iff parameter should be passed in memory. */
7166 static bool
7167 examine_argument (machine_mode mode, const_tree type, int in_return,
7168 int *int_nregs, int *sse_nregs)
7170 enum x86_64_reg_class regclass[MAX_CLASSES];
7171 int n = classify_argument (mode, type, regclass, 0);
7173 *int_nregs = 0;
7174 *sse_nregs = 0;
7176 if (!n)
7177 return true;
7178 for (n--; n >= 0; n--)
7179 switch (regclass[n])
7181 case X86_64_INTEGER_CLASS:
7182 case X86_64_INTEGERSI_CLASS:
7183 (*int_nregs)++;
7184 break;
7185 case X86_64_SSE_CLASS:
7186 case X86_64_SSESF_CLASS:
7187 case X86_64_SSEDF_CLASS:
7188 (*sse_nregs)++;
7189 break;
7190 case X86_64_NO_CLASS:
7191 case X86_64_SSEUP_CLASS:
7192 break;
7193 case X86_64_X87_CLASS:
7194 case X86_64_X87UP_CLASS:
7195 case X86_64_COMPLEX_X87_CLASS:
7196 if (!in_return)
7197 return true;
7198 break;
7199 case X86_64_MEMORY_CLASS:
7200 gcc_unreachable ();
7203 return false;
7206 /* Construct container for the argument used by GCC interface. See
7207 FUNCTION_ARG for the detailed description. */
7209 static rtx
7210 construct_container (machine_mode mode, machine_mode orig_mode,
7211 const_tree type, int in_return, int nintregs, int nsseregs,
7212 const int *intreg, int sse_regno)
7214 /* The following variables hold the static issued_error state. */
7215 static bool issued_sse_arg_error;
7216 static bool issued_sse_ret_error;
7217 static bool issued_x87_ret_error;
7219 machine_mode tmpmode;
7220 int bytes =
7221 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7222 enum x86_64_reg_class regclass[MAX_CLASSES];
7223 int n;
7224 int i;
7225 int nexps = 0;
7226 int needed_sseregs, needed_intregs;
7227 rtx exp[MAX_CLASSES];
7228 rtx ret;
7230 n = classify_argument (mode, type, regclass, 0);
7231 if (!n)
7232 return NULL;
7233 if (examine_argument (mode, type, in_return, &needed_intregs,
7234 &needed_sseregs))
7235 return NULL;
7236 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7237 return NULL;
7239 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7240 some less clueful developer tries to use floating-point anyway. */
7241 if (needed_sseregs && !TARGET_SSE)
7243 if (in_return)
7245 if (!issued_sse_ret_error)
7247 error ("SSE register return with SSE disabled");
7248 issued_sse_ret_error = true;
7251 else if (!issued_sse_arg_error)
7253 error ("SSE register argument with SSE disabled");
7254 issued_sse_arg_error = true;
7256 return NULL;
7259 /* Likewise, error if the ABI requires us to return values in the
7260 x87 registers and the user specified -mno-80387. */
7261 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7262 for (i = 0; i < n; i++)
7263 if (regclass[i] == X86_64_X87_CLASS
7264 || regclass[i] == X86_64_X87UP_CLASS
7265 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7267 if (!issued_x87_ret_error)
7269 error ("x87 register return with x87 disabled");
7270 issued_x87_ret_error = true;
7272 return NULL;
7275 /* First construct simple cases. Avoid SCmode, since we want to use
7276 single register to pass this type. */
7277 if (n == 1 && mode != SCmode)
7278 switch (regclass[0])
7280 case X86_64_INTEGER_CLASS:
7281 case X86_64_INTEGERSI_CLASS:
7282 return gen_rtx_REG (mode, intreg[0]);
7283 case X86_64_SSE_CLASS:
7284 case X86_64_SSESF_CLASS:
7285 case X86_64_SSEDF_CLASS:
7286 if (mode != BLKmode)
7287 return gen_reg_or_parallel (mode, orig_mode,
7288 SSE_REGNO (sse_regno));
7289 break;
7290 case X86_64_X87_CLASS:
7291 case X86_64_COMPLEX_X87_CLASS:
7292 return gen_rtx_REG (mode, FIRST_STACK_REG);
7293 case X86_64_NO_CLASS:
7294 /* Zero sized array, struct or class. */
7295 return NULL;
7296 default:
7297 gcc_unreachable ();
7299 if (n == 2
7300 && regclass[0] == X86_64_SSE_CLASS
7301 && regclass[1] == X86_64_SSEUP_CLASS
7302 && mode != BLKmode)
7303 return gen_reg_or_parallel (mode, orig_mode,
7304 SSE_REGNO (sse_regno));
7305 if (n == 4
7306 && regclass[0] == X86_64_SSE_CLASS
7307 && regclass[1] == X86_64_SSEUP_CLASS
7308 && regclass[2] == X86_64_SSEUP_CLASS
7309 && regclass[3] == X86_64_SSEUP_CLASS
7310 && mode != BLKmode)
7311 return gen_reg_or_parallel (mode, orig_mode,
7312 SSE_REGNO (sse_regno));
7313 if (n == 8
7314 && regclass[0] == X86_64_SSE_CLASS
7315 && regclass[1] == X86_64_SSEUP_CLASS
7316 && regclass[2] == X86_64_SSEUP_CLASS
7317 && regclass[3] == X86_64_SSEUP_CLASS
7318 && regclass[4] == X86_64_SSEUP_CLASS
7319 && regclass[5] == X86_64_SSEUP_CLASS
7320 && regclass[6] == X86_64_SSEUP_CLASS
7321 && regclass[7] == X86_64_SSEUP_CLASS
7322 && mode != BLKmode)
7323 return gen_reg_or_parallel (mode, orig_mode,
7324 SSE_REGNO (sse_regno));
7325 if (n == 2
7326 && regclass[0] == X86_64_X87_CLASS
7327 && regclass[1] == X86_64_X87UP_CLASS)
7328 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7330 if (n == 2
7331 && regclass[0] == X86_64_INTEGER_CLASS
7332 && regclass[1] == X86_64_INTEGER_CLASS
7333 && (mode == CDImode || mode == TImode)
7334 && intreg[0] + 1 == intreg[1])
7335 return gen_rtx_REG (mode, intreg[0]);
7337 /* Otherwise figure out the entries of the PARALLEL. */
7338 for (i = 0; i < n; i++)
7340 int pos;
7342 switch (regclass[i])
7344 case X86_64_NO_CLASS:
7345 break;
7346 case X86_64_INTEGER_CLASS:
7347 case X86_64_INTEGERSI_CLASS:
7348 /* Merge TImodes on aligned occasions here too. */
7349 if (i * 8 + 8 > bytes)
7350 tmpmode
7351 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7352 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7353 tmpmode = SImode;
7354 else
7355 tmpmode = DImode;
7356 /* We've requested 24 bytes we
7357 don't have mode for. Use DImode. */
7358 if (tmpmode == BLKmode)
7359 tmpmode = DImode;
7360 exp [nexps++]
7361 = gen_rtx_EXPR_LIST (VOIDmode,
7362 gen_rtx_REG (tmpmode, *intreg),
7363 GEN_INT (i*8));
7364 intreg++;
7365 break;
7366 case X86_64_SSESF_CLASS:
7367 exp [nexps++]
7368 = gen_rtx_EXPR_LIST (VOIDmode,
7369 gen_rtx_REG (SFmode,
7370 SSE_REGNO (sse_regno)),
7371 GEN_INT (i*8));
7372 sse_regno++;
7373 break;
7374 case X86_64_SSEDF_CLASS:
7375 exp [nexps++]
7376 = gen_rtx_EXPR_LIST (VOIDmode,
7377 gen_rtx_REG (DFmode,
7378 SSE_REGNO (sse_regno)),
7379 GEN_INT (i*8));
7380 sse_regno++;
7381 break;
7382 case X86_64_SSE_CLASS:
7383 pos = i;
7384 switch (n)
7386 case 1:
7387 tmpmode = DImode;
7388 break;
7389 case 2:
7390 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7392 tmpmode = TImode;
7393 i++;
7395 else
7396 tmpmode = DImode;
7397 break;
7398 case 4:
7399 gcc_assert (i == 0
7400 && regclass[1] == X86_64_SSEUP_CLASS
7401 && regclass[2] == X86_64_SSEUP_CLASS
7402 && regclass[3] == X86_64_SSEUP_CLASS);
7403 tmpmode = OImode;
7404 i += 3;
7405 break;
7406 case 8:
7407 gcc_assert (i == 0
7408 && regclass[1] == X86_64_SSEUP_CLASS
7409 && regclass[2] == X86_64_SSEUP_CLASS
7410 && regclass[3] == X86_64_SSEUP_CLASS
7411 && regclass[4] == X86_64_SSEUP_CLASS
7412 && regclass[5] == X86_64_SSEUP_CLASS
7413 && regclass[6] == X86_64_SSEUP_CLASS
7414 && regclass[7] == X86_64_SSEUP_CLASS);
7415 tmpmode = XImode;
7416 i += 7;
7417 break;
7418 default:
7419 gcc_unreachable ();
7421 exp [nexps++]
7422 = gen_rtx_EXPR_LIST (VOIDmode,
7423 gen_rtx_REG (tmpmode,
7424 SSE_REGNO (sse_regno)),
7425 GEN_INT (pos*8));
7426 sse_regno++;
7427 break;
7428 default:
7429 gcc_unreachable ();
7433 /* Empty aligned struct, union or class. */
7434 if (nexps == 0)
7435 return NULL;
7437 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7438 for (i = 0; i < nexps; i++)
7439 XVECEXP (ret, 0, i) = exp [i];
7440 return ret;
7443 /* Update the data in CUM to advance over an argument of mode MODE
7444 and data type TYPE. (TYPE is null for libcalls where that information
7445 may not be available.)
7447 Return a number of integer regsiters advanced over. */
7449 static int
7450 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7451 const_tree type, HOST_WIDE_INT bytes,
7452 HOST_WIDE_INT words)
7454 int res = 0;
7456 switch (mode)
7458 default:
7459 break;
7461 case BLKmode:
7462 if (bytes < 0)
7463 break;
7464 /* FALLTHRU */
7466 case DImode:
7467 case SImode:
7468 case HImode:
7469 case QImode:
7470 cum->words += words;
7471 cum->nregs -= words;
7472 cum->regno += words;
7473 if (cum->nregs >= 0)
7474 res = words;
7475 if (cum->nregs <= 0)
7477 cum->nregs = 0;
7478 cum->regno = 0;
7480 break;
7482 case OImode:
7483 /* OImode shouldn't be used directly. */
7484 gcc_unreachable ();
7486 case DFmode:
7487 if (cum->float_in_sse < 2)
7488 break;
7489 case SFmode:
7490 if (cum->float_in_sse < 1)
7491 break;
7492 /* FALLTHRU */
7494 case V8SFmode:
7495 case V8SImode:
7496 case V64QImode:
7497 case V32HImode:
7498 case V16SImode:
7499 case V8DImode:
7500 case V16SFmode:
7501 case V8DFmode:
7502 case V32QImode:
7503 case V16HImode:
7504 case V4DFmode:
7505 case V4DImode:
7506 case TImode:
7507 case V16QImode:
7508 case V8HImode:
7509 case V4SImode:
7510 case V2DImode:
7511 case V4SFmode:
7512 case V2DFmode:
7513 if (!type || !AGGREGATE_TYPE_P (type))
7515 cum->sse_words += words;
7516 cum->sse_nregs -= 1;
7517 cum->sse_regno += 1;
7518 if (cum->sse_nregs <= 0)
7520 cum->sse_nregs = 0;
7521 cum->sse_regno = 0;
7524 break;
7526 case V8QImode:
7527 case V4HImode:
7528 case V2SImode:
7529 case V2SFmode:
7530 case V1TImode:
7531 case V1DImode:
7532 if (!type || !AGGREGATE_TYPE_P (type))
7534 cum->mmx_words += words;
7535 cum->mmx_nregs -= 1;
7536 cum->mmx_regno += 1;
7537 if (cum->mmx_nregs <= 0)
7539 cum->mmx_nregs = 0;
7540 cum->mmx_regno = 0;
7543 break;
7546 return res;
7549 static int
7550 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7551 const_tree type, HOST_WIDE_INT words, bool named)
7553 int int_nregs, sse_nregs;
7555 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7556 if (!named && (VALID_AVX512F_REG_MODE (mode)
7557 || VALID_AVX256_REG_MODE (mode)))
7558 return 0;
7560 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7561 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7563 cum->nregs -= int_nregs;
7564 cum->sse_nregs -= sse_nregs;
7565 cum->regno += int_nregs;
7566 cum->sse_regno += sse_nregs;
7567 return int_nregs;
7569 else
7571 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7572 cum->words = (cum->words + align - 1) & ~(align - 1);
7573 cum->words += words;
7574 return 0;
7578 static int
7579 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7580 HOST_WIDE_INT words)
7582 /* Otherwise, this should be passed indirect. */
7583 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7585 cum->words += words;
7586 if (cum->nregs > 0)
7588 cum->nregs -= 1;
7589 cum->regno += 1;
7590 return 1;
7592 return 0;
7595 /* Update the data in CUM to advance over an argument of mode MODE and
7596 data type TYPE. (TYPE is null for libcalls where that information
7597 may not be available.) */
7599 static void
7600 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7601 const_tree type, bool named)
7603 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7604 HOST_WIDE_INT bytes, words;
7605 int nregs;
7607 if (mode == BLKmode)
7608 bytes = int_size_in_bytes (type);
7609 else
7610 bytes = GET_MODE_SIZE (mode);
7611 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7613 if (type)
7614 mode = type_natural_mode (type, NULL, false);
7616 if ((type && POINTER_BOUNDS_TYPE_P (type))
7617 || POINTER_BOUNDS_MODE_P (mode))
7619 /* If we pass bounds in BT then just update remained bounds count. */
7620 if (cum->bnds_in_bt)
7622 cum->bnds_in_bt--;
7623 return;
7626 /* Update remained number of bounds to force. */
7627 if (cum->force_bnd_pass)
7628 cum->force_bnd_pass--;
7630 cum->bnd_regno++;
7632 return;
7635 /* The first arg not going to Bounds Tables resets this counter. */
7636 cum->bnds_in_bt = 0;
7637 /* For unnamed args we always pass bounds to avoid bounds mess when
7638 passed and received types do not match. If bounds do not follow
7639 unnamed arg, still pretend required number of bounds were passed. */
7640 if (cum->force_bnd_pass)
7642 cum->bnd_regno += cum->force_bnd_pass;
7643 cum->force_bnd_pass = 0;
7646 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7647 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7648 else if (TARGET_64BIT)
7649 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7650 else
7651 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7653 /* For stdarg we expect bounds to be passed for each value passed
7654 in register. */
7655 if (cum->stdarg)
7656 cum->force_bnd_pass = nregs;
7657 /* For pointers passed in memory we expect bounds passed in Bounds
7658 Table. */
7659 if (!nregs)
7660 cum->bnds_in_bt = chkp_type_bounds_count (type);
7663 /* Define where to put the arguments to a function.
7664 Value is zero to push the argument on the stack,
7665 or a hard register in which to store the argument.
7667 MODE is the argument's machine mode.
7668 TYPE is the data type of the argument (as a tree).
7669 This is null for libcalls where that information may
7670 not be available.
7671 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7672 the preceding args and about the function being called.
7673 NAMED is nonzero if this argument is a named parameter
7674 (otherwise it is an extra parameter matching an ellipsis). */
7676 static rtx
7677 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7678 machine_mode orig_mode, const_tree type,
7679 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7681 /* Avoid the AL settings for the Unix64 ABI. */
7682 if (mode == VOIDmode)
7683 return constm1_rtx;
7685 switch (mode)
7687 default:
7688 break;
7690 case BLKmode:
7691 if (bytes < 0)
7692 break;
7693 /* FALLTHRU */
7694 case DImode:
7695 case SImode:
7696 case HImode:
7697 case QImode:
7698 if (words <= cum->nregs)
7700 int regno = cum->regno;
7702 /* Fastcall allocates the first two DWORD (SImode) or
7703 smaller arguments to ECX and EDX if it isn't an
7704 aggregate type . */
7705 if (cum->fastcall)
7707 if (mode == BLKmode
7708 || mode == DImode
7709 || (type && AGGREGATE_TYPE_P (type)))
7710 break;
7712 /* ECX not EAX is the first allocated register. */
7713 if (regno == AX_REG)
7714 regno = CX_REG;
7716 return gen_rtx_REG (mode, regno);
7718 break;
7720 case DFmode:
7721 if (cum->float_in_sse < 2)
7722 break;
7723 case SFmode:
7724 if (cum->float_in_sse < 1)
7725 break;
7726 /* FALLTHRU */
7727 case TImode:
7728 /* In 32bit, we pass TImode in xmm registers. */
7729 case V16QImode:
7730 case V8HImode:
7731 case V4SImode:
7732 case V2DImode:
7733 case V4SFmode:
7734 case V2DFmode:
7735 if (!type || !AGGREGATE_TYPE_P (type))
7737 if (cum->sse_nregs)
7738 return gen_reg_or_parallel (mode, orig_mode,
7739 cum->sse_regno + FIRST_SSE_REG);
7741 break;
7743 case OImode:
7744 case XImode:
7745 /* OImode and XImode shouldn't be used directly. */
7746 gcc_unreachable ();
7748 case V64QImode:
7749 case V32HImode:
7750 case V16SImode:
7751 case V8DImode:
7752 case V16SFmode:
7753 case V8DFmode:
7754 case V8SFmode:
7755 case V8SImode:
7756 case V32QImode:
7757 case V16HImode:
7758 case V4DFmode:
7759 case V4DImode:
7760 if (!type || !AGGREGATE_TYPE_P (type))
7762 if (cum->sse_nregs)
7763 return gen_reg_or_parallel (mode, orig_mode,
7764 cum->sse_regno + FIRST_SSE_REG);
7766 break;
7768 case V8QImode:
7769 case V4HImode:
7770 case V2SImode:
7771 case V2SFmode:
7772 case V1TImode:
7773 case V1DImode:
7774 if (!type || !AGGREGATE_TYPE_P (type))
7776 if (cum->mmx_nregs)
7777 return gen_reg_or_parallel (mode, orig_mode,
7778 cum->mmx_regno + FIRST_MMX_REG);
7780 break;
7783 return NULL_RTX;
7786 static rtx
7787 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7788 machine_mode orig_mode, const_tree type, bool named)
7790 /* Handle a hidden AL argument containing number of registers
7791 for varargs x86-64 functions. */
7792 if (mode == VOIDmode)
7793 return GEN_INT (cum->maybe_vaarg
7794 ? (cum->sse_nregs < 0
7795 ? X86_64_SSE_REGPARM_MAX
7796 : cum->sse_regno)
7797 : -1);
7799 switch (mode)
7801 default:
7802 break;
7804 case V8SFmode:
7805 case V8SImode:
7806 case V32QImode:
7807 case V16HImode:
7808 case V4DFmode:
7809 case V4DImode:
7810 case V16SFmode:
7811 case V16SImode:
7812 case V64QImode:
7813 case V32HImode:
7814 case V8DFmode:
7815 case V8DImode:
7816 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7817 if (!named)
7818 return NULL;
7819 break;
7822 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7823 cum->sse_nregs,
7824 &x86_64_int_parameter_registers [cum->regno],
7825 cum->sse_regno);
7828 static rtx
7829 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7830 machine_mode orig_mode, bool named,
7831 HOST_WIDE_INT bytes)
7833 unsigned int regno;
7835 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7836 We use value of -2 to specify that current function call is MSABI. */
7837 if (mode == VOIDmode)
7838 return GEN_INT (-2);
7840 /* If we've run out of registers, it goes on the stack. */
7841 if (cum->nregs == 0)
7842 return NULL_RTX;
7844 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7846 /* Only floating point modes are passed in anything but integer regs. */
7847 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7849 if (named)
7850 regno = cum->regno + FIRST_SSE_REG;
7851 else
7853 rtx t1, t2;
7855 /* Unnamed floating parameters are passed in both the
7856 SSE and integer registers. */
7857 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7858 t2 = gen_rtx_REG (mode, regno);
7859 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7860 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7861 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7864 /* Handle aggregated types passed in register. */
7865 if (orig_mode == BLKmode)
7867 if (bytes > 0 && bytes <= 8)
7868 mode = (bytes > 4 ? DImode : SImode);
7869 if (mode == BLKmode)
7870 mode = DImode;
7873 return gen_reg_or_parallel (mode, orig_mode, regno);
7876 /* Return where to put the arguments to a function.
7877 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7879 MODE is the argument's machine mode. TYPE is the data type of the
7880 argument. It is null for libcalls where that information may not be
7881 available. CUM gives information about the preceding args and about
7882 the function being called. NAMED is nonzero if this argument is a
7883 named parameter (otherwise it is an extra parameter matching an
7884 ellipsis). */
7886 static rtx
7887 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7888 const_tree type, bool named)
7890 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7891 machine_mode mode = omode;
7892 HOST_WIDE_INT bytes, words;
7893 rtx arg;
7895 /* All pointer bounds argumntas are handled separately here. */
7896 if ((type && POINTER_BOUNDS_TYPE_P (type))
7897 || POINTER_BOUNDS_MODE_P (mode))
7899 /* Return NULL if bounds are forced to go in Bounds Table. */
7900 if (cum->bnds_in_bt)
7901 arg = NULL;
7902 /* Return the next available bound reg if any. */
7903 else if (cum->bnd_regno <= LAST_BND_REG)
7904 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7905 /* Return the next special slot number otherwise. */
7906 else
7907 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7909 return arg;
7912 if (mode == BLKmode)
7913 bytes = int_size_in_bytes (type);
7914 else
7915 bytes = GET_MODE_SIZE (mode);
7916 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7918 /* To simplify the code below, represent vector types with a vector mode
7919 even if MMX/SSE are not active. */
7920 if (type && TREE_CODE (type) == VECTOR_TYPE)
7921 mode = type_natural_mode (type, cum, false);
7923 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7924 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7925 else if (TARGET_64BIT)
7926 arg = function_arg_64 (cum, mode, omode, type, named);
7927 else
7928 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7930 return arg;
7933 /* A C expression that indicates when an argument must be passed by
7934 reference. If nonzero for an argument, a copy of that argument is
7935 made in memory and a pointer to the argument is passed instead of
7936 the argument itself. The pointer is passed in whatever way is
7937 appropriate for passing a pointer to that type. */
7939 static bool
7940 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7941 const_tree type, bool)
7943 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7945 /* Bounds are never passed by reference. */
7946 if ((type && POINTER_BOUNDS_TYPE_P (type))
7947 || POINTER_BOUNDS_MODE_P (mode))
7948 return false;
7950 /* See Windows x64 Software Convention. */
7951 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7953 int msize = (int) GET_MODE_SIZE (mode);
7954 if (type)
7956 /* Arrays are passed by reference. */
7957 if (TREE_CODE (type) == ARRAY_TYPE)
7958 return true;
7960 if (AGGREGATE_TYPE_P (type))
7962 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7963 are passed by reference. */
7964 msize = int_size_in_bytes (type);
7968 /* __m128 is passed by reference. */
7969 switch (msize) {
7970 case 1: case 2: case 4: case 8:
7971 break;
7972 default:
7973 return true;
7976 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7977 return 1;
7979 return 0;
7982 /* Return true when TYPE should be 128bit aligned for 32bit argument
7983 passing ABI. XXX: This function is obsolete and is only used for
7984 checking psABI compatibility with previous versions of GCC. */
7986 static bool
7987 ix86_compat_aligned_value_p (const_tree type)
7989 machine_mode mode = TYPE_MODE (type);
7990 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7991 || mode == TDmode
7992 || mode == TFmode
7993 || mode == TCmode)
7994 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7995 return true;
7996 if (TYPE_ALIGN (type) < 128)
7997 return false;
7999 if (AGGREGATE_TYPE_P (type))
8001 /* Walk the aggregates recursively. */
8002 switch (TREE_CODE (type))
8004 case RECORD_TYPE:
8005 case UNION_TYPE:
8006 case QUAL_UNION_TYPE:
8008 tree field;
8010 /* Walk all the structure fields. */
8011 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8013 if (TREE_CODE (field) == FIELD_DECL
8014 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8015 return true;
8017 break;
8020 case ARRAY_TYPE:
8021 /* Just for use if some languages passes arrays by value. */
8022 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8023 return true;
8024 break;
8026 default:
8027 gcc_unreachable ();
8030 return false;
8033 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8034 XXX: This function is obsolete and is only used for checking psABI
8035 compatibility with previous versions of GCC. */
8037 static unsigned int
8038 ix86_compat_function_arg_boundary (machine_mode mode,
8039 const_tree type, unsigned int align)
8041 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8042 natural boundaries. */
8043 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8045 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8046 make an exception for SSE modes since these require 128bit
8047 alignment.
8049 The handling here differs from field_alignment. ICC aligns MMX
8050 arguments to 4 byte boundaries, while structure fields are aligned
8051 to 8 byte boundaries. */
8052 if (!type)
8054 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8055 align = PARM_BOUNDARY;
8057 else
8059 if (!ix86_compat_aligned_value_p (type))
8060 align = PARM_BOUNDARY;
8063 if (align > BIGGEST_ALIGNMENT)
8064 align = BIGGEST_ALIGNMENT;
8065 return align;
8068 /* Return true when TYPE should be 128bit aligned for 32bit argument
8069 passing ABI. */
8071 static bool
8072 ix86_contains_aligned_value_p (const_tree type)
8074 machine_mode mode = TYPE_MODE (type);
8076 if (mode == XFmode || mode == XCmode)
8077 return false;
8079 if (TYPE_ALIGN (type) < 128)
8080 return false;
8082 if (AGGREGATE_TYPE_P (type))
8084 /* Walk the aggregates recursively. */
8085 switch (TREE_CODE (type))
8087 case RECORD_TYPE:
8088 case UNION_TYPE:
8089 case QUAL_UNION_TYPE:
8091 tree field;
8093 /* Walk all the structure fields. */
8094 for (field = TYPE_FIELDS (type);
8095 field;
8096 field = DECL_CHAIN (field))
8098 if (TREE_CODE (field) == FIELD_DECL
8099 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8100 return true;
8102 break;
8105 case ARRAY_TYPE:
8106 /* Just for use if some languages passes arrays by value. */
8107 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8108 return true;
8109 break;
8111 default:
8112 gcc_unreachable ();
8115 else
8116 return TYPE_ALIGN (type) >= 128;
8118 return false;
8121 /* Gives the alignment boundary, in bits, of an argument with the
8122 specified mode and type. */
8124 static unsigned int
8125 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8127 unsigned int align;
8128 if (type)
8130 /* Since the main variant type is used for call, we convert it to
8131 the main variant type. */
8132 type = TYPE_MAIN_VARIANT (type);
8133 align = TYPE_ALIGN (type);
8135 else
8136 align = GET_MODE_ALIGNMENT (mode);
8137 if (align < PARM_BOUNDARY)
8138 align = PARM_BOUNDARY;
8139 else
8141 static bool warned;
8142 unsigned int saved_align = align;
8144 if (!TARGET_64BIT)
8146 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8147 if (!type)
8149 if (mode == XFmode || mode == XCmode)
8150 align = PARM_BOUNDARY;
8152 else if (!ix86_contains_aligned_value_p (type))
8153 align = PARM_BOUNDARY;
8155 if (align < 128)
8156 align = PARM_BOUNDARY;
8159 if (warn_psabi
8160 && !warned
8161 && align != ix86_compat_function_arg_boundary (mode, type,
8162 saved_align))
8164 warned = true;
8165 inform (input_location,
8166 "The ABI for passing parameters with %d-byte"
8167 " alignment has changed in GCC 4.6",
8168 align / BITS_PER_UNIT);
8172 return align;
8175 /* Return true if N is a possible register number of function value. */
8177 static bool
8178 ix86_function_value_regno_p (const unsigned int regno)
8180 switch (regno)
8182 case AX_REG:
8183 return true;
8184 case DX_REG:
8185 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8186 case DI_REG:
8187 case SI_REG:
8188 return TARGET_64BIT && ix86_abi != MS_ABI;
8190 case FIRST_BND_REG:
8191 return chkp_function_instrumented_p (current_function_decl);
8193 /* Complex values are returned in %st(0)/%st(1) pair. */
8194 case ST0_REG:
8195 case ST1_REG:
8196 /* TODO: The function should depend on current function ABI but
8197 builtins.c would need updating then. Therefore we use the
8198 default ABI. */
8199 if (TARGET_64BIT && ix86_abi == MS_ABI)
8200 return false;
8201 return TARGET_FLOAT_RETURNS_IN_80387;
8203 /* Complex values are returned in %xmm0/%xmm1 pair. */
8204 case XMM0_REG:
8205 case XMM1_REG:
8206 return TARGET_SSE;
8208 case MM0_REG:
8209 if (TARGET_MACHO || TARGET_64BIT)
8210 return false;
8211 return TARGET_MMX;
8214 return false;
8217 /* Define how to find the value returned by a function.
8218 VALTYPE is the data type of the value (as a tree).
8219 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8220 otherwise, FUNC is 0. */
8222 static rtx
8223 function_value_32 (machine_mode orig_mode, machine_mode mode,
8224 const_tree fntype, const_tree fn)
8226 unsigned int regno;
8228 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8229 we normally prevent this case when mmx is not available. However
8230 some ABIs may require the result to be returned like DImode. */
8231 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8232 regno = FIRST_MMX_REG;
8234 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8235 we prevent this case when sse is not available. However some ABIs
8236 may require the result to be returned like integer TImode. */
8237 else if (mode == TImode
8238 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8239 regno = FIRST_SSE_REG;
8241 /* 32-byte vector modes in %ymm0. */
8242 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8243 regno = FIRST_SSE_REG;
8245 /* 64-byte vector modes in %zmm0. */
8246 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8247 regno = FIRST_SSE_REG;
8249 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8250 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8251 regno = FIRST_FLOAT_REG;
8252 else
8253 /* Most things go in %eax. */
8254 regno = AX_REG;
8256 /* Override FP return register with %xmm0 for local functions when
8257 SSE math is enabled or for functions with sseregparm attribute. */
8258 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8260 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8261 if ((sse_level >= 1 && mode == SFmode)
8262 || (sse_level == 2 && mode == DFmode))
8263 regno = FIRST_SSE_REG;
8266 /* OImode shouldn't be used directly. */
8267 gcc_assert (mode != OImode);
8269 return gen_rtx_REG (orig_mode, regno);
8272 static rtx
8273 function_value_64 (machine_mode orig_mode, machine_mode mode,
8274 const_tree valtype)
8276 rtx ret;
8278 /* Handle libcalls, which don't provide a type node. */
8279 if (valtype == NULL)
8281 unsigned int regno;
8283 switch (mode)
8285 case SFmode:
8286 case SCmode:
8287 case DFmode:
8288 case DCmode:
8289 case TFmode:
8290 case SDmode:
8291 case DDmode:
8292 case TDmode:
8293 regno = FIRST_SSE_REG;
8294 break;
8295 case XFmode:
8296 case XCmode:
8297 regno = FIRST_FLOAT_REG;
8298 break;
8299 case TCmode:
8300 return NULL;
8301 default:
8302 regno = AX_REG;
8305 return gen_rtx_REG (mode, regno);
8307 else if (POINTER_TYPE_P (valtype))
8309 /* Pointers are always returned in word_mode. */
8310 mode = word_mode;
8313 ret = construct_container (mode, orig_mode, valtype, 1,
8314 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8315 x86_64_int_return_registers, 0);
8317 /* For zero sized structures, construct_container returns NULL, but we
8318 need to keep rest of compiler happy by returning meaningful value. */
8319 if (!ret)
8320 ret = gen_rtx_REG (orig_mode, AX_REG);
8322 return ret;
8325 static rtx
8326 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8327 const_tree valtype)
8329 unsigned int regno = AX_REG;
8331 if (TARGET_SSE)
8333 switch (GET_MODE_SIZE (mode))
8335 case 16:
8336 if (valtype != NULL_TREE
8337 && !VECTOR_INTEGER_TYPE_P (valtype)
8338 && !VECTOR_INTEGER_TYPE_P (valtype)
8339 && !INTEGRAL_TYPE_P (valtype)
8340 && !VECTOR_FLOAT_TYPE_P (valtype))
8341 break;
8342 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8343 && !COMPLEX_MODE_P (mode))
8344 regno = FIRST_SSE_REG;
8345 break;
8346 case 8:
8347 case 4:
8348 if (mode == SFmode || mode == DFmode)
8349 regno = FIRST_SSE_REG;
8350 break;
8351 default:
8352 break;
8355 return gen_rtx_REG (orig_mode, regno);
8358 static rtx
8359 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8360 machine_mode orig_mode, machine_mode mode)
8362 const_tree fn, fntype;
8364 fn = NULL_TREE;
8365 if (fntype_or_decl && DECL_P (fntype_or_decl))
8366 fn = fntype_or_decl;
8367 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8369 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8370 || POINTER_BOUNDS_MODE_P (mode))
8371 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8372 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8373 return function_value_ms_64 (orig_mode, mode, valtype);
8374 else if (TARGET_64BIT)
8375 return function_value_64 (orig_mode, mode, valtype);
8376 else
8377 return function_value_32 (orig_mode, mode, fntype, fn);
8380 static rtx
8381 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8383 machine_mode mode, orig_mode;
8385 orig_mode = TYPE_MODE (valtype);
8386 mode = type_natural_mode (valtype, NULL, true);
8387 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8390 /* Return an RTX representing a place where a function returns
8391 or recieves pointer bounds or NULL if no bounds are returned.
8393 VALTYPE is a data type of a value returned by the function.
8395 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8396 or FUNCTION_TYPE of the function.
8398 If OUTGOING is false, return a place in which the caller will
8399 see the return value. Otherwise, return a place where a
8400 function returns a value. */
8402 static rtx
8403 ix86_function_value_bounds (const_tree valtype,
8404 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8405 bool outgoing ATTRIBUTE_UNUSED)
8407 rtx res = NULL_RTX;
8409 if (BOUNDED_TYPE_P (valtype))
8410 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8411 else if (chkp_type_has_pointer (valtype))
8413 bitmap slots;
8414 rtx bounds[2];
8415 bitmap_iterator bi;
8416 unsigned i, bnd_no = 0;
8418 bitmap_obstack_initialize (NULL);
8419 slots = BITMAP_ALLOC (NULL);
8420 chkp_find_bound_slots (valtype, slots);
8422 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8424 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8425 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8426 gcc_assert (bnd_no < 2);
8427 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8430 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8432 BITMAP_FREE (slots);
8433 bitmap_obstack_release (NULL);
8435 else
8436 res = NULL_RTX;
8438 return res;
8441 /* Pointer function arguments and return values are promoted to
8442 word_mode. */
8444 static machine_mode
8445 ix86_promote_function_mode (const_tree type, machine_mode mode,
8446 int *punsignedp, const_tree fntype,
8447 int for_return)
8449 if (type != NULL_TREE && POINTER_TYPE_P (type))
8451 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8452 return word_mode;
8454 return default_promote_function_mode (type, mode, punsignedp, fntype,
8455 for_return);
8458 /* Return true if a structure, union or array with MODE containing FIELD
8459 should be accessed using BLKmode. */
8461 static bool
8462 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8464 /* Union with XFmode must be in BLKmode. */
8465 return (mode == XFmode
8466 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8467 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8471 ix86_libcall_value (machine_mode mode)
8473 return ix86_function_value_1 (NULL, NULL, mode, mode);
8476 /* Return true iff type is returned in memory. */
8478 static bool
8479 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8481 #ifdef SUBTARGET_RETURN_IN_MEMORY
8482 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8483 #else
8484 const machine_mode mode = type_natural_mode (type, NULL, true);
8485 HOST_WIDE_INT size;
8487 if (POINTER_BOUNDS_TYPE_P (type))
8488 return false;
8490 if (TARGET_64BIT)
8492 if (ix86_function_type_abi (fntype) == MS_ABI)
8494 size = int_size_in_bytes (type);
8496 /* __m128 is returned in xmm0. */
8497 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8498 || INTEGRAL_TYPE_P (type)
8499 || VECTOR_FLOAT_TYPE_P (type))
8500 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8501 && !COMPLEX_MODE_P (mode)
8502 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8503 return false;
8505 /* Otherwise, the size must be exactly in [1248]. */
8506 return size != 1 && size != 2 && size != 4 && size != 8;
8508 else
8510 int needed_intregs, needed_sseregs;
8512 return examine_argument (mode, type, 1,
8513 &needed_intregs, &needed_sseregs);
8516 else
8518 if (mode == BLKmode)
8519 return true;
8521 size = int_size_in_bytes (type);
8523 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8524 return false;
8526 if (VECTOR_MODE_P (mode) || mode == TImode)
8528 /* User-created vectors small enough to fit in EAX. */
8529 if (size < 8)
8530 return false;
8532 /* Unless ABI prescibes otherwise,
8533 MMX/3dNow values are returned in MM0 if available. */
8535 if (size == 8)
8536 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8538 /* SSE values are returned in XMM0 if available. */
8539 if (size == 16)
8540 return !TARGET_SSE;
8542 /* AVX values are returned in YMM0 if available. */
8543 if (size == 32)
8544 return !TARGET_AVX;
8546 /* AVX512F values are returned in ZMM0 if available. */
8547 if (size == 64)
8548 return !TARGET_AVX512F;
8551 if (mode == XFmode)
8552 return false;
8554 if (size > 12)
8555 return true;
8557 /* OImode shouldn't be used directly. */
8558 gcc_assert (mode != OImode);
8560 return false;
8562 #endif
8566 /* Create the va_list data type. */
8568 /* Returns the calling convention specific va_list date type.
8569 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8571 static tree
8572 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8574 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8576 /* For i386 we use plain pointer to argument area. */
8577 if (!TARGET_64BIT || abi == MS_ABI)
8578 return build_pointer_type (char_type_node);
8580 record = lang_hooks.types.make_type (RECORD_TYPE);
8581 type_decl = build_decl (BUILTINS_LOCATION,
8582 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8584 f_gpr = build_decl (BUILTINS_LOCATION,
8585 FIELD_DECL, get_identifier ("gp_offset"),
8586 unsigned_type_node);
8587 f_fpr = build_decl (BUILTINS_LOCATION,
8588 FIELD_DECL, get_identifier ("fp_offset"),
8589 unsigned_type_node);
8590 f_ovf = build_decl (BUILTINS_LOCATION,
8591 FIELD_DECL, get_identifier ("overflow_arg_area"),
8592 ptr_type_node);
8593 f_sav = build_decl (BUILTINS_LOCATION,
8594 FIELD_DECL, get_identifier ("reg_save_area"),
8595 ptr_type_node);
8597 va_list_gpr_counter_field = f_gpr;
8598 va_list_fpr_counter_field = f_fpr;
8600 DECL_FIELD_CONTEXT (f_gpr) = record;
8601 DECL_FIELD_CONTEXT (f_fpr) = record;
8602 DECL_FIELD_CONTEXT (f_ovf) = record;
8603 DECL_FIELD_CONTEXT (f_sav) = record;
8605 TYPE_STUB_DECL (record) = type_decl;
8606 TYPE_NAME (record) = type_decl;
8607 TYPE_FIELDS (record) = f_gpr;
8608 DECL_CHAIN (f_gpr) = f_fpr;
8609 DECL_CHAIN (f_fpr) = f_ovf;
8610 DECL_CHAIN (f_ovf) = f_sav;
8612 layout_type (record);
8614 /* The correct type is an array type of one element. */
8615 return build_array_type (record, build_index_type (size_zero_node));
8618 /* Setup the builtin va_list data type and for 64-bit the additional
8619 calling convention specific va_list data types. */
8621 static tree
8622 ix86_build_builtin_va_list (void)
8624 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8626 /* Initialize abi specific va_list builtin types. */
8627 if (TARGET_64BIT)
8629 tree t;
8630 if (ix86_abi == MS_ABI)
8632 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8633 if (TREE_CODE (t) != RECORD_TYPE)
8634 t = build_variant_type_copy (t);
8635 sysv_va_list_type_node = t;
8637 else
8639 t = ret;
8640 if (TREE_CODE (t) != RECORD_TYPE)
8641 t = build_variant_type_copy (t);
8642 sysv_va_list_type_node = t;
8644 if (ix86_abi != MS_ABI)
8646 t = ix86_build_builtin_va_list_abi (MS_ABI);
8647 if (TREE_CODE (t) != RECORD_TYPE)
8648 t = build_variant_type_copy (t);
8649 ms_va_list_type_node = t;
8651 else
8653 t = ret;
8654 if (TREE_CODE (t) != RECORD_TYPE)
8655 t = build_variant_type_copy (t);
8656 ms_va_list_type_node = t;
8660 return ret;
8663 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8665 static void
8666 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8668 rtx save_area, mem;
8669 alias_set_type set;
8670 int i, max;
8672 /* GPR size of varargs save area. */
8673 if (cfun->va_list_gpr_size)
8674 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8675 else
8676 ix86_varargs_gpr_size = 0;
8678 /* FPR size of varargs save area. We don't need it if we don't pass
8679 anything in SSE registers. */
8680 if (TARGET_SSE && cfun->va_list_fpr_size)
8681 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8682 else
8683 ix86_varargs_fpr_size = 0;
8685 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8686 return;
8688 save_area = frame_pointer_rtx;
8689 set = get_varargs_alias_set ();
8691 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8692 if (max > X86_64_REGPARM_MAX)
8693 max = X86_64_REGPARM_MAX;
8695 for (i = cum->regno; i < max; i++)
8697 mem = gen_rtx_MEM (word_mode,
8698 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8699 MEM_NOTRAP_P (mem) = 1;
8700 set_mem_alias_set (mem, set);
8701 emit_move_insn (mem,
8702 gen_rtx_REG (word_mode,
8703 x86_64_int_parameter_registers[i]));
8706 if (ix86_varargs_fpr_size)
8708 machine_mode smode;
8709 rtx_code_label *label;
8710 rtx test;
8712 /* Now emit code to save SSE registers. The AX parameter contains number
8713 of SSE parameter registers used to call this function, though all we
8714 actually check here is the zero/non-zero status. */
8716 label = gen_label_rtx ();
8717 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8718 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8719 label));
8721 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8722 we used movdqa (i.e. TImode) instead? Perhaps even better would
8723 be if we could determine the real mode of the data, via a hook
8724 into pass_stdarg. Ignore all that for now. */
8725 smode = V4SFmode;
8726 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8727 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8729 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8730 if (max > X86_64_SSE_REGPARM_MAX)
8731 max = X86_64_SSE_REGPARM_MAX;
8733 for (i = cum->sse_regno; i < max; ++i)
8735 mem = plus_constant (Pmode, save_area,
8736 i * 16 + ix86_varargs_gpr_size);
8737 mem = gen_rtx_MEM (smode, mem);
8738 MEM_NOTRAP_P (mem) = 1;
8739 set_mem_alias_set (mem, set);
8740 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8742 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8745 emit_label (label);
8749 static void
8750 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8752 alias_set_type set = get_varargs_alias_set ();
8753 int i;
8755 /* Reset to zero, as there might be a sysv vaarg used
8756 before. */
8757 ix86_varargs_gpr_size = 0;
8758 ix86_varargs_fpr_size = 0;
8760 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8762 rtx reg, mem;
8764 mem = gen_rtx_MEM (Pmode,
8765 plus_constant (Pmode, virtual_incoming_args_rtx,
8766 i * UNITS_PER_WORD));
8767 MEM_NOTRAP_P (mem) = 1;
8768 set_mem_alias_set (mem, set);
8770 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8771 emit_move_insn (mem, reg);
8775 static void
8776 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8777 tree type, int *, int no_rtl)
8779 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8780 CUMULATIVE_ARGS next_cum;
8781 tree fntype;
8783 /* This argument doesn't appear to be used anymore. Which is good,
8784 because the old code here didn't suppress rtl generation. */
8785 gcc_assert (!no_rtl);
8787 if (!TARGET_64BIT)
8788 return;
8790 fntype = TREE_TYPE (current_function_decl);
8792 /* For varargs, we do not want to skip the dummy va_dcl argument.
8793 For stdargs, we do want to skip the last named argument. */
8794 next_cum = *cum;
8795 if (stdarg_p (fntype))
8796 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8797 true);
8799 if (cum->call_abi == MS_ABI)
8800 setup_incoming_varargs_ms_64 (&next_cum);
8801 else
8802 setup_incoming_varargs_64 (&next_cum);
8805 static void
8806 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8807 enum machine_mode mode,
8808 tree type,
8809 int *pretend_size ATTRIBUTE_UNUSED,
8810 int no_rtl)
8812 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8813 CUMULATIVE_ARGS next_cum;
8814 tree fntype;
8815 rtx save_area;
8816 int bnd_reg, i, max;
8818 gcc_assert (!no_rtl);
8820 /* Do nothing if we use plain pointer to argument area. */
8821 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8822 return;
8824 fntype = TREE_TYPE (current_function_decl);
8826 /* For varargs, we do not want to skip the dummy va_dcl argument.
8827 For stdargs, we do want to skip the last named argument. */
8828 next_cum = *cum;
8829 if (stdarg_p (fntype))
8830 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8831 true);
8832 save_area = frame_pointer_rtx;
8834 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8835 if (max > X86_64_REGPARM_MAX)
8836 max = X86_64_REGPARM_MAX;
8838 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8839 if (chkp_function_instrumented_p (current_function_decl))
8840 for (i = cum->regno; i < max; i++)
8842 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8843 rtx reg = gen_rtx_REG (DImode,
8844 x86_64_int_parameter_registers[i]);
8845 rtx ptr = reg;
8846 rtx bounds;
8848 if (bnd_reg <= LAST_BND_REG)
8849 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8850 else
8852 rtx ldx_addr =
8853 plus_constant (Pmode, arg_pointer_rtx,
8854 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8855 bounds = gen_reg_rtx (BNDmode);
8856 emit_insn (BNDmode == BND64mode
8857 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8858 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8861 emit_insn (BNDmode == BND64mode
8862 ? gen_bnd64_stx (addr, ptr, bounds)
8863 : gen_bnd32_stx (addr, ptr, bounds));
8865 bnd_reg++;
8870 /* Checks if TYPE is of kind va_list char *. */
8872 static bool
8873 is_va_list_char_pointer (tree type)
8875 tree canonic;
8877 /* For 32-bit it is always true. */
8878 if (!TARGET_64BIT)
8879 return true;
8880 canonic = ix86_canonical_va_list_type (type);
8881 return (canonic == ms_va_list_type_node
8882 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8885 /* Implement va_start. */
8887 static void
8888 ix86_va_start (tree valist, rtx nextarg)
8890 HOST_WIDE_INT words, n_gpr, n_fpr;
8891 tree f_gpr, f_fpr, f_ovf, f_sav;
8892 tree gpr, fpr, ovf, sav, t;
8893 tree type;
8894 rtx ovf_rtx;
8896 if (flag_split_stack
8897 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8899 unsigned int scratch_regno;
8901 /* When we are splitting the stack, we can't refer to the stack
8902 arguments using internal_arg_pointer, because they may be on
8903 the old stack. The split stack prologue will arrange to
8904 leave a pointer to the old stack arguments in a scratch
8905 register, which we here copy to a pseudo-register. The split
8906 stack prologue can't set the pseudo-register directly because
8907 it (the prologue) runs before any registers have been saved. */
8909 scratch_regno = split_stack_prologue_scratch_regno ();
8910 if (scratch_regno != INVALID_REGNUM)
8912 rtx reg;
8913 rtx_insn *seq;
8915 reg = gen_reg_rtx (Pmode);
8916 cfun->machine->split_stack_varargs_pointer = reg;
8918 start_sequence ();
8919 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8920 seq = get_insns ();
8921 end_sequence ();
8923 push_topmost_sequence ();
8924 emit_insn_after (seq, entry_of_function ());
8925 pop_topmost_sequence ();
8929 /* Only 64bit target needs something special. */
8930 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8932 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8933 std_expand_builtin_va_start (valist, nextarg);
8934 else
8936 rtx va_r, next;
8938 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8939 next = expand_binop (ptr_mode, add_optab,
8940 cfun->machine->split_stack_varargs_pointer,
8941 crtl->args.arg_offset_rtx,
8942 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8943 convert_move (va_r, next, 0);
8945 /* Store zero bounds for va_list. */
8946 if (chkp_function_instrumented_p (current_function_decl))
8947 chkp_expand_bounds_reset_for_mem (valist,
8948 make_tree (TREE_TYPE (valist),
8949 next));
8952 return;
8955 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8956 f_fpr = DECL_CHAIN (f_gpr);
8957 f_ovf = DECL_CHAIN (f_fpr);
8958 f_sav = DECL_CHAIN (f_ovf);
8960 valist = build_simple_mem_ref (valist);
8961 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8962 /* The following should be folded into the MEM_REF offset. */
8963 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8964 f_gpr, NULL_TREE);
8965 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8966 f_fpr, NULL_TREE);
8967 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8968 f_ovf, NULL_TREE);
8969 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8970 f_sav, NULL_TREE);
8972 /* Count number of gp and fp argument registers used. */
8973 words = crtl->args.info.words;
8974 n_gpr = crtl->args.info.regno;
8975 n_fpr = crtl->args.info.sse_regno;
8977 if (cfun->va_list_gpr_size)
8979 type = TREE_TYPE (gpr);
8980 t = build2 (MODIFY_EXPR, type,
8981 gpr, build_int_cst (type, n_gpr * 8));
8982 TREE_SIDE_EFFECTS (t) = 1;
8983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8986 if (TARGET_SSE && cfun->va_list_fpr_size)
8988 type = TREE_TYPE (fpr);
8989 t = build2 (MODIFY_EXPR, type, fpr,
8990 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8991 TREE_SIDE_EFFECTS (t) = 1;
8992 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8995 /* Find the overflow area. */
8996 type = TREE_TYPE (ovf);
8997 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8998 ovf_rtx = crtl->args.internal_arg_pointer;
8999 else
9000 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9001 t = make_tree (type, ovf_rtx);
9002 if (words != 0)
9003 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9005 /* Store zero bounds for overflow area pointer. */
9006 if (chkp_function_instrumented_p (current_function_decl))
9007 chkp_expand_bounds_reset_for_mem (ovf, t);
9009 t = build2 (MODIFY_EXPR, type, ovf, t);
9010 TREE_SIDE_EFFECTS (t) = 1;
9011 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9013 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9015 /* Find the register save area.
9016 Prologue of the function save it right above stack frame. */
9017 type = TREE_TYPE (sav);
9018 t = make_tree (type, frame_pointer_rtx);
9019 if (!ix86_varargs_gpr_size)
9020 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9022 /* Store zero bounds for save area pointer. */
9023 if (chkp_function_instrumented_p (current_function_decl))
9024 chkp_expand_bounds_reset_for_mem (sav, t);
9026 t = build2 (MODIFY_EXPR, type, sav, t);
9027 TREE_SIDE_EFFECTS (t) = 1;
9028 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9032 /* Implement va_arg. */
9034 static tree
9035 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9036 gimple_seq *post_p)
9038 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9039 tree f_gpr, f_fpr, f_ovf, f_sav;
9040 tree gpr, fpr, ovf, sav, t;
9041 int size, rsize;
9042 tree lab_false, lab_over = NULL_TREE;
9043 tree addr, t2;
9044 rtx container;
9045 int indirect_p = 0;
9046 tree ptrtype;
9047 machine_mode nat_mode;
9048 unsigned int arg_boundary;
9050 /* Only 64bit target needs something special. */
9051 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9052 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9054 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9055 f_fpr = DECL_CHAIN (f_gpr);
9056 f_ovf = DECL_CHAIN (f_fpr);
9057 f_sav = DECL_CHAIN (f_ovf);
9059 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9060 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9061 valist = build_va_arg_indirect_ref (valist);
9062 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9063 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9064 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9066 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9067 if (indirect_p)
9068 type = build_pointer_type (type);
9069 size = int_size_in_bytes (type);
9070 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9072 nat_mode = type_natural_mode (type, NULL, false);
9073 switch (nat_mode)
9075 case V8SFmode:
9076 case V8SImode:
9077 case V32QImode:
9078 case V16HImode:
9079 case V4DFmode:
9080 case V4DImode:
9081 case V16SFmode:
9082 case V16SImode:
9083 case V64QImode:
9084 case V32HImode:
9085 case V8DFmode:
9086 case V8DImode:
9087 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9088 if (!TARGET_64BIT_MS_ABI)
9090 container = NULL;
9091 break;
9094 default:
9095 container = construct_container (nat_mode, TYPE_MODE (type),
9096 type, 0, X86_64_REGPARM_MAX,
9097 X86_64_SSE_REGPARM_MAX, intreg,
9099 break;
9102 /* Pull the value out of the saved registers. */
9104 addr = create_tmp_var (ptr_type_node, "addr");
9106 if (container)
9108 int needed_intregs, needed_sseregs;
9109 bool need_temp;
9110 tree int_addr, sse_addr;
9112 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9113 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9115 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9117 need_temp = (!REG_P (container)
9118 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9119 || TYPE_ALIGN (type) > 128));
9121 /* In case we are passing structure, verify that it is consecutive block
9122 on the register save area. If not we need to do moves. */
9123 if (!need_temp && !REG_P (container))
9125 /* Verify that all registers are strictly consecutive */
9126 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9128 int i;
9130 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9132 rtx slot = XVECEXP (container, 0, i);
9133 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9134 || INTVAL (XEXP (slot, 1)) != i * 16)
9135 need_temp = true;
9138 else
9140 int i;
9142 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9144 rtx slot = XVECEXP (container, 0, i);
9145 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9146 || INTVAL (XEXP (slot, 1)) != i * 8)
9147 need_temp = true;
9151 if (!need_temp)
9153 int_addr = addr;
9154 sse_addr = addr;
9156 else
9158 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9159 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9162 /* First ensure that we fit completely in registers. */
9163 if (needed_intregs)
9165 t = build_int_cst (TREE_TYPE (gpr),
9166 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9167 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9168 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9169 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9170 gimplify_and_add (t, pre_p);
9172 if (needed_sseregs)
9174 t = build_int_cst (TREE_TYPE (fpr),
9175 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9176 + X86_64_REGPARM_MAX * 8);
9177 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9178 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9179 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9180 gimplify_and_add (t, pre_p);
9183 /* Compute index to start of area used for integer regs. */
9184 if (needed_intregs)
9186 /* int_addr = gpr + sav; */
9187 t = fold_build_pointer_plus (sav, gpr);
9188 gimplify_assign (int_addr, t, pre_p);
9190 if (needed_sseregs)
9192 /* sse_addr = fpr + sav; */
9193 t = fold_build_pointer_plus (sav, fpr);
9194 gimplify_assign (sse_addr, t, pre_p);
9196 if (need_temp)
9198 int i, prev_size = 0;
9199 tree temp = create_tmp_var (type, "va_arg_tmp");
9201 /* addr = &temp; */
9202 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9203 gimplify_assign (addr, t, pre_p);
9205 for (i = 0; i < XVECLEN (container, 0); i++)
9207 rtx slot = XVECEXP (container, 0, i);
9208 rtx reg = XEXP (slot, 0);
9209 machine_mode mode = GET_MODE (reg);
9210 tree piece_type;
9211 tree addr_type;
9212 tree daddr_type;
9213 tree src_addr, src;
9214 int src_offset;
9215 tree dest_addr, dest;
9216 int cur_size = GET_MODE_SIZE (mode);
9218 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9219 prev_size = INTVAL (XEXP (slot, 1));
9220 if (prev_size + cur_size > size)
9222 cur_size = size - prev_size;
9223 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9224 if (mode == BLKmode)
9225 mode = QImode;
9227 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9228 if (mode == GET_MODE (reg))
9229 addr_type = build_pointer_type (piece_type);
9230 else
9231 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9232 true);
9233 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9234 true);
9236 if (SSE_REGNO_P (REGNO (reg)))
9238 src_addr = sse_addr;
9239 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9241 else
9243 src_addr = int_addr;
9244 src_offset = REGNO (reg) * 8;
9246 src_addr = fold_convert (addr_type, src_addr);
9247 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9249 dest_addr = fold_convert (daddr_type, addr);
9250 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9251 if (cur_size == GET_MODE_SIZE (mode))
9253 src = build_va_arg_indirect_ref (src_addr);
9254 dest = build_va_arg_indirect_ref (dest_addr);
9256 gimplify_assign (dest, src, pre_p);
9258 else
9260 tree copy
9261 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9262 3, dest_addr, src_addr,
9263 size_int (cur_size));
9264 gimplify_and_add (copy, pre_p);
9266 prev_size += cur_size;
9270 if (needed_intregs)
9272 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9273 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9274 gimplify_assign (gpr, t, pre_p);
9277 if (needed_sseregs)
9279 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9280 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9281 gimplify_assign (fpr, t, pre_p);
9284 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9286 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9289 /* ... otherwise out of the overflow area. */
9291 /* When we align parameter on stack for caller, if the parameter
9292 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9293 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9294 here with caller. */
9295 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9296 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9297 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9299 /* Care for on-stack alignment if needed. */
9300 if (arg_boundary <= 64 || size == 0)
9301 t = ovf;
9302 else
9304 HOST_WIDE_INT align = arg_boundary / 8;
9305 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9306 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9307 build_int_cst (TREE_TYPE (t), -align));
9310 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9311 gimplify_assign (addr, t, pre_p);
9313 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9314 gimplify_assign (unshare_expr (ovf), t, pre_p);
9316 if (container)
9317 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9319 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9320 addr = fold_convert (ptrtype, addr);
9322 if (indirect_p)
9323 addr = build_va_arg_indirect_ref (addr);
9324 return build_va_arg_indirect_ref (addr);
9327 /* Return true if OPNUM's MEM should be matched
9328 in movabs* patterns. */
9330 bool
9331 ix86_check_movabs (rtx insn, int opnum)
9333 rtx set, mem;
9335 set = PATTERN (insn);
9336 if (GET_CODE (set) == PARALLEL)
9337 set = XVECEXP (set, 0, 0);
9338 gcc_assert (GET_CODE (set) == SET);
9339 mem = XEXP (set, opnum);
9340 while (GET_CODE (mem) == SUBREG)
9341 mem = SUBREG_REG (mem);
9342 gcc_assert (MEM_P (mem));
9343 return volatile_ok || !MEM_VOLATILE_P (mem);
9346 /* Initialize the table of extra 80387 mathematical constants. */
9348 static void
9349 init_ext_80387_constants (void)
9351 static const char * cst[5] =
9353 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9354 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9355 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9356 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9357 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9359 int i;
9361 for (i = 0; i < 5; i++)
9363 real_from_string (&ext_80387_constants_table[i], cst[i]);
9364 /* Ensure each constant is rounded to XFmode precision. */
9365 real_convert (&ext_80387_constants_table[i],
9366 XFmode, &ext_80387_constants_table[i]);
9369 ext_80387_constants_init = 1;
9372 /* Return non-zero if the constant is something that
9373 can be loaded with a special instruction. */
9376 standard_80387_constant_p (rtx x)
9378 machine_mode mode = GET_MODE (x);
9380 REAL_VALUE_TYPE r;
9382 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9383 return -1;
9385 if (x == CONST0_RTX (mode))
9386 return 1;
9387 if (x == CONST1_RTX (mode))
9388 return 2;
9390 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9392 /* For XFmode constants, try to find a special 80387 instruction when
9393 optimizing for size or on those CPUs that benefit from them. */
9394 if (mode == XFmode
9395 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9397 int i;
9399 if (! ext_80387_constants_init)
9400 init_ext_80387_constants ();
9402 for (i = 0; i < 5; i++)
9403 if (real_identical (&r, &ext_80387_constants_table[i]))
9404 return i + 3;
9407 /* Load of the constant -0.0 or -1.0 will be split as
9408 fldz;fchs or fld1;fchs sequence. */
9409 if (real_isnegzero (&r))
9410 return 8;
9411 if (real_identical (&r, &dconstm1))
9412 return 9;
9414 return 0;
9417 /* Return the opcode of the special instruction to be used to load
9418 the constant X. */
9420 const char *
9421 standard_80387_constant_opcode (rtx x)
9423 switch (standard_80387_constant_p (x))
9425 case 1:
9426 return "fldz";
9427 case 2:
9428 return "fld1";
9429 case 3:
9430 return "fldlg2";
9431 case 4:
9432 return "fldln2";
9433 case 5:
9434 return "fldl2e";
9435 case 6:
9436 return "fldl2t";
9437 case 7:
9438 return "fldpi";
9439 case 8:
9440 case 9:
9441 return "#";
9442 default:
9443 gcc_unreachable ();
9447 /* Return the CONST_DOUBLE representing the 80387 constant that is
9448 loaded by the specified special instruction. The argument IDX
9449 matches the return value from standard_80387_constant_p. */
9452 standard_80387_constant_rtx (int idx)
9454 int i;
9456 if (! ext_80387_constants_init)
9457 init_ext_80387_constants ();
9459 switch (idx)
9461 case 3:
9462 case 4:
9463 case 5:
9464 case 6:
9465 case 7:
9466 i = idx - 3;
9467 break;
9469 default:
9470 gcc_unreachable ();
9473 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9474 XFmode);
9477 /* Return 1 if X is all 0s and 2 if x is all 1s
9478 in supported SSE/AVX vector mode. */
9481 standard_sse_constant_p (rtx x)
9483 machine_mode mode;
9485 if (!TARGET_SSE)
9486 return 0;
9488 mode = GET_MODE (x);
9490 if (x == const0_rtx || x == CONST0_RTX (mode))
9491 return 1;
9492 if (vector_all_ones_operand (x, mode))
9493 switch (mode)
9495 case V16QImode:
9496 case V8HImode:
9497 case V4SImode:
9498 case V2DImode:
9499 if (TARGET_SSE2)
9500 return 2;
9501 case V32QImode:
9502 case V16HImode:
9503 case V8SImode:
9504 case V4DImode:
9505 if (TARGET_AVX2)
9506 return 2;
9507 case V64QImode:
9508 case V32HImode:
9509 case V16SImode:
9510 case V8DImode:
9511 if (TARGET_AVX512F)
9512 return 2;
9513 default:
9514 break;
9517 return 0;
9520 /* Return the opcode of the special instruction to be used to load
9521 the constant X. */
9523 const char *
9524 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9526 switch (standard_sse_constant_p (x))
9528 case 1:
9529 switch (get_attr_mode (insn))
9531 case MODE_XI:
9532 return "vpxord\t%g0, %g0, %g0";
9533 case MODE_V16SF:
9534 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9535 : "vpxord\t%g0, %g0, %g0";
9536 case MODE_V8DF:
9537 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9538 : "vpxorq\t%g0, %g0, %g0";
9539 case MODE_TI:
9540 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9541 : "%vpxor\t%0, %d0";
9542 case MODE_V2DF:
9543 return "%vxorpd\t%0, %d0";
9544 case MODE_V4SF:
9545 return "%vxorps\t%0, %d0";
9547 case MODE_OI:
9548 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9549 : "vpxor\t%x0, %x0, %x0";
9550 case MODE_V4DF:
9551 return "vxorpd\t%x0, %x0, %x0";
9552 case MODE_V8SF:
9553 return "vxorps\t%x0, %x0, %x0";
9555 default:
9556 break;
9559 case 2:
9560 if (TARGET_AVX512VL
9561 || get_attr_mode (insn) == MODE_XI
9562 || get_attr_mode (insn) == MODE_V8DF
9563 || get_attr_mode (insn) == MODE_V16SF)
9564 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9565 if (TARGET_AVX)
9566 return "vpcmpeqd\t%0, %0, %0";
9567 else
9568 return "pcmpeqd\t%0, %0";
9570 default:
9571 break;
9573 gcc_unreachable ();
9576 /* Returns true if OP contains a symbol reference */
9578 bool
9579 symbolic_reference_mentioned_p (rtx op)
9581 const char *fmt;
9582 int i;
9584 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9585 return true;
9587 fmt = GET_RTX_FORMAT (GET_CODE (op));
9588 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9590 if (fmt[i] == 'E')
9592 int j;
9594 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9595 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9596 return true;
9599 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9600 return true;
9603 return false;
9606 /* Return true if it is appropriate to emit `ret' instructions in the
9607 body of a function. Do this only if the epilogue is simple, needing a
9608 couple of insns. Prior to reloading, we can't tell how many registers
9609 must be saved, so return false then. Return false if there is no frame
9610 marker to de-allocate. */
9612 bool
9613 ix86_can_use_return_insn_p (void)
9615 struct ix86_frame frame;
9617 if (! reload_completed || frame_pointer_needed)
9618 return 0;
9620 /* Don't allow more than 32k pop, since that's all we can do
9621 with one instruction. */
9622 if (crtl->args.pops_args && crtl->args.size >= 32768)
9623 return 0;
9625 ix86_compute_frame_layout (&frame);
9626 return (frame.stack_pointer_offset == UNITS_PER_WORD
9627 && (frame.nregs + frame.nsseregs) == 0);
9630 /* Value should be nonzero if functions must have frame pointers.
9631 Zero means the frame pointer need not be set up (and parms may
9632 be accessed via the stack pointer) in functions that seem suitable. */
9634 static bool
9635 ix86_frame_pointer_required (void)
9637 /* If we accessed previous frames, then the generated code expects
9638 to be able to access the saved ebp value in our frame. */
9639 if (cfun->machine->accesses_prev_frame)
9640 return true;
9642 /* Several x86 os'es need a frame pointer for other reasons,
9643 usually pertaining to setjmp. */
9644 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9645 return true;
9647 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9648 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9649 return true;
9651 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9652 allocation is 4GB. */
9653 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9654 return true;
9656 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9657 turns off the frame pointer by default. Turn it back on now if
9658 we've not got a leaf function. */
9659 if (TARGET_OMIT_LEAF_FRAME_POINTER
9660 && (!crtl->is_leaf
9661 || ix86_current_function_calls_tls_descriptor))
9662 return true;
9664 if (crtl->profile && !flag_fentry)
9665 return true;
9667 return false;
9670 /* Record that the current function accesses previous call frames. */
9672 void
9673 ix86_setup_frame_addresses (void)
9675 cfun->machine->accesses_prev_frame = 1;
9678 #ifndef USE_HIDDEN_LINKONCE
9679 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9680 # define USE_HIDDEN_LINKONCE 1
9681 # else
9682 # define USE_HIDDEN_LINKONCE 0
9683 # endif
9684 #endif
9686 static int pic_labels_used;
9688 /* Fills in the label name that should be used for a pc thunk for
9689 the given register. */
9691 static void
9692 get_pc_thunk_name (char name[32], unsigned int regno)
9694 gcc_assert (!TARGET_64BIT);
9696 if (USE_HIDDEN_LINKONCE)
9697 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9698 else
9699 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9703 /* This function generates code for -fpic that loads %ebx with
9704 the return address of the caller and then returns. */
9706 static void
9707 ix86_code_end (void)
9709 rtx xops[2];
9710 int regno;
9712 for (regno = AX_REG; regno <= SP_REG; regno++)
9714 char name[32];
9715 tree decl;
9717 if (!(pic_labels_used & (1 << regno)))
9718 continue;
9720 get_pc_thunk_name (name, regno);
9722 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9723 get_identifier (name),
9724 build_function_type_list (void_type_node, NULL_TREE));
9725 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9726 NULL_TREE, void_type_node);
9727 TREE_PUBLIC (decl) = 1;
9728 TREE_STATIC (decl) = 1;
9729 DECL_IGNORED_P (decl) = 1;
9731 #if TARGET_MACHO
9732 if (TARGET_MACHO)
9734 switch_to_section (darwin_sections[text_coal_section]);
9735 fputs ("\t.weak_definition\t", asm_out_file);
9736 assemble_name (asm_out_file, name);
9737 fputs ("\n\t.private_extern\t", asm_out_file);
9738 assemble_name (asm_out_file, name);
9739 putc ('\n', asm_out_file);
9740 ASM_OUTPUT_LABEL (asm_out_file, name);
9741 DECL_WEAK (decl) = 1;
9743 else
9744 #endif
9745 if (USE_HIDDEN_LINKONCE)
9747 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9749 targetm.asm_out.unique_section (decl, 0);
9750 switch_to_section (get_named_section (decl, NULL, 0));
9752 targetm.asm_out.globalize_label (asm_out_file, name);
9753 fputs ("\t.hidden\t", asm_out_file);
9754 assemble_name (asm_out_file, name);
9755 putc ('\n', asm_out_file);
9756 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9758 else
9760 switch_to_section (text_section);
9761 ASM_OUTPUT_LABEL (asm_out_file, name);
9764 DECL_INITIAL (decl) = make_node (BLOCK);
9765 current_function_decl = decl;
9766 init_function_start (decl);
9767 first_function_block_is_cold = false;
9768 /* Make sure unwind info is emitted for the thunk if needed. */
9769 final_start_function (emit_barrier (), asm_out_file, 1);
9771 /* Pad stack IP move with 4 instructions (two NOPs count
9772 as one instruction). */
9773 if (TARGET_PAD_SHORT_FUNCTION)
9775 int i = 8;
9777 while (i--)
9778 fputs ("\tnop\n", asm_out_file);
9781 xops[0] = gen_rtx_REG (Pmode, regno);
9782 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9783 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9784 output_asm_insn ("%!ret", NULL);
9785 final_end_function ();
9786 init_insn_lengths ();
9787 free_after_compilation (cfun);
9788 set_cfun (NULL);
9789 current_function_decl = NULL;
9792 if (flag_split_stack)
9793 file_end_indicate_split_stack ();
9796 /* Emit code for the SET_GOT patterns. */
9798 const char *
9799 output_set_got (rtx dest, rtx label)
9801 rtx xops[3];
9803 xops[0] = dest;
9805 if (TARGET_VXWORKS_RTP && flag_pic)
9807 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9808 xops[2] = gen_rtx_MEM (Pmode,
9809 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9810 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9812 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9813 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9814 an unadorned address. */
9815 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9816 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9817 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9818 return "";
9821 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9823 if (!flag_pic)
9825 if (TARGET_MACHO)
9826 /* We don't need a pic base, we're not producing pic. */
9827 gcc_unreachable ();
9829 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9830 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9831 targetm.asm_out.internal_label (asm_out_file, "L",
9832 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9834 else
9836 char name[32];
9837 get_pc_thunk_name (name, REGNO (dest));
9838 pic_labels_used |= 1 << REGNO (dest);
9840 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9841 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9842 output_asm_insn ("%!call\t%X2", xops);
9844 #if TARGET_MACHO
9845 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9846 This is what will be referenced by the Mach-O PIC subsystem. */
9847 if (machopic_should_output_picbase_label () || !label)
9848 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9850 /* When we are restoring the pic base at the site of a nonlocal label,
9851 and we decided to emit the pic base above, we will still output a
9852 local label used for calculating the correction offset (even though
9853 the offset will be 0 in that case). */
9854 if (label)
9855 targetm.asm_out.internal_label (asm_out_file, "L",
9856 CODE_LABEL_NUMBER (label));
9857 #endif
9860 if (!TARGET_MACHO)
9861 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9863 return "";
9866 /* Generate an "push" pattern for input ARG. */
9868 static rtx
9869 gen_push (rtx arg)
9871 struct machine_function *m = cfun->machine;
9873 if (m->fs.cfa_reg == stack_pointer_rtx)
9874 m->fs.cfa_offset += UNITS_PER_WORD;
9875 m->fs.sp_offset += UNITS_PER_WORD;
9877 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9878 arg = gen_rtx_REG (word_mode, REGNO (arg));
9880 return gen_rtx_SET (gen_rtx_MEM (word_mode,
9881 gen_rtx_PRE_DEC (Pmode,
9882 stack_pointer_rtx)),
9883 arg);
9886 /* Generate an "pop" pattern for input ARG. */
9888 static rtx
9889 gen_pop (rtx arg)
9891 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9892 arg = gen_rtx_REG (word_mode, REGNO (arg));
9894 return gen_rtx_SET (arg,
9895 gen_rtx_MEM (word_mode,
9896 gen_rtx_POST_INC (Pmode,
9897 stack_pointer_rtx)));
9900 /* Return >= 0 if there is an unused call-clobbered register available
9901 for the entire function. */
9903 static unsigned int
9904 ix86_select_alt_pic_regnum (void)
9906 if (ix86_use_pseudo_pic_reg ())
9907 return INVALID_REGNUM;
9909 if (crtl->is_leaf
9910 && !crtl->profile
9911 && !ix86_current_function_calls_tls_descriptor)
9913 int i, drap;
9914 /* Can't use the same register for both PIC and DRAP. */
9915 if (crtl->drap_reg)
9916 drap = REGNO (crtl->drap_reg);
9917 else
9918 drap = -1;
9919 for (i = 2; i >= 0; --i)
9920 if (i != drap && !df_regs_ever_live_p (i))
9921 return i;
9924 return INVALID_REGNUM;
9927 /* Return TRUE if we need to save REGNO. */
9929 static bool
9930 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9932 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9933 && pic_offset_table_rtx)
9935 if (ix86_use_pseudo_pic_reg ())
9937 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9938 _mcount in prologue. */
9939 if (!TARGET_64BIT && flag_pic && crtl->profile)
9940 return true;
9942 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9943 || crtl->profile
9944 || crtl->calls_eh_return
9945 || crtl->uses_const_pool
9946 || cfun->has_nonlocal_label)
9947 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9950 if (crtl->calls_eh_return && maybe_eh_return)
9952 unsigned i;
9953 for (i = 0; ; i++)
9955 unsigned test = EH_RETURN_DATA_REGNO (i);
9956 if (test == INVALID_REGNUM)
9957 break;
9958 if (test == regno)
9959 return true;
9963 if (crtl->drap_reg
9964 && regno == REGNO (crtl->drap_reg)
9965 && !cfun->machine->no_drap_save_restore)
9966 return true;
9968 return (df_regs_ever_live_p (regno)
9969 && !call_used_regs[regno]
9970 && !fixed_regs[regno]
9971 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9974 /* Return number of saved general prupose registers. */
9976 static int
9977 ix86_nsaved_regs (void)
9979 int nregs = 0;
9980 int regno;
9982 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9983 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9984 nregs ++;
9985 return nregs;
9988 /* Return number of saved SSE registrers. */
9990 static int
9991 ix86_nsaved_sseregs (void)
9993 int nregs = 0;
9994 int regno;
9996 if (!TARGET_64BIT_MS_ABI)
9997 return 0;
9998 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9999 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10000 nregs ++;
10001 return nregs;
10004 /* Given FROM and TO register numbers, say whether this elimination is
10005 allowed. If stack alignment is needed, we can only replace argument
10006 pointer with hard frame pointer, or replace frame pointer with stack
10007 pointer. Otherwise, frame pointer elimination is automatically
10008 handled and all other eliminations are valid. */
10010 static bool
10011 ix86_can_eliminate (const int from, const int to)
10013 if (stack_realign_fp)
10014 return ((from == ARG_POINTER_REGNUM
10015 && to == HARD_FRAME_POINTER_REGNUM)
10016 || (from == FRAME_POINTER_REGNUM
10017 && to == STACK_POINTER_REGNUM));
10018 else
10019 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10022 /* Return the offset between two registers, one to be eliminated, and the other
10023 its replacement, at the start of a routine. */
10025 HOST_WIDE_INT
10026 ix86_initial_elimination_offset (int from, int to)
10028 struct ix86_frame frame;
10029 ix86_compute_frame_layout (&frame);
10031 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10032 return frame.hard_frame_pointer_offset;
10033 else if (from == FRAME_POINTER_REGNUM
10034 && to == HARD_FRAME_POINTER_REGNUM)
10035 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10036 else
10038 gcc_assert (to == STACK_POINTER_REGNUM);
10040 if (from == ARG_POINTER_REGNUM)
10041 return frame.stack_pointer_offset;
10043 gcc_assert (from == FRAME_POINTER_REGNUM);
10044 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10048 /* In a dynamically-aligned function, we can't know the offset from
10049 stack pointer to frame pointer, so we must ensure that setjmp
10050 eliminates fp against the hard fp (%ebp) rather than trying to
10051 index from %esp up to the top of the frame across a gap that is
10052 of unknown (at compile-time) size. */
10053 static rtx
10054 ix86_builtin_setjmp_frame_value (void)
10056 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10059 /* When using -fsplit-stack, the allocation routines set a field in
10060 the TCB to the bottom of the stack plus this much space, measured
10061 in bytes. */
10063 #define SPLIT_STACK_AVAILABLE 256
10065 /* Fill structure ix86_frame about frame of currently computed function. */
10067 static void
10068 ix86_compute_frame_layout (struct ix86_frame *frame)
10070 unsigned HOST_WIDE_INT stack_alignment_needed;
10071 HOST_WIDE_INT offset;
10072 unsigned HOST_WIDE_INT preferred_alignment;
10073 HOST_WIDE_INT size = get_frame_size ();
10074 HOST_WIDE_INT to_allocate;
10076 frame->nregs = ix86_nsaved_regs ();
10077 frame->nsseregs = ix86_nsaved_sseregs ();
10079 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10080 function prologues and leaf. */
10081 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10082 && (!crtl->is_leaf || cfun->calls_alloca != 0
10083 || ix86_current_function_calls_tls_descriptor))
10085 crtl->preferred_stack_boundary = 128;
10086 crtl->stack_alignment_needed = 128;
10088 /* preferred_stack_boundary is never updated for call
10089 expanded from tls descriptor. Update it here. We don't update it in
10090 expand stage because according to the comments before
10091 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10092 away. */
10093 else if (ix86_current_function_calls_tls_descriptor
10094 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10096 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10097 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10098 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10101 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10102 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10104 gcc_assert (!size || stack_alignment_needed);
10105 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10106 gcc_assert (preferred_alignment <= stack_alignment_needed);
10108 /* For SEH we have to limit the amount of code movement into the prologue.
10109 At present we do this via a BLOCKAGE, at which point there's very little
10110 scheduling that can be done, which means that there's very little point
10111 in doing anything except PUSHs. */
10112 if (TARGET_SEH)
10113 cfun->machine->use_fast_prologue_epilogue = false;
10115 /* During reload iteration the amount of registers saved can change.
10116 Recompute the value as needed. Do not recompute when amount of registers
10117 didn't change as reload does multiple calls to the function and does not
10118 expect the decision to change within single iteration. */
10119 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10120 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10122 int count = frame->nregs;
10123 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10125 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10127 /* The fast prologue uses move instead of push to save registers. This
10128 is significantly longer, but also executes faster as modern hardware
10129 can execute the moves in parallel, but can't do that for push/pop.
10131 Be careful about choosing what prologue to emit: When function takes
10132 many instructions to execute we may use slow version as well as in
10133 case function is known to be outside hot spot (this is known with
10134 feedback only). Weight the size of function by number of registers
10135 to save as it is cheap to use one or two push instructions but very
10136 slow to use many of them. */
10137 if (count)
10138 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10139 if (node->frequency < NODE_FREQUENCY_NORMAL
10140 || (flag_branch_probabilities
10141 && node->frequency < NODE_FREQUENCY_HOT))
10142 cfun->machine->use_fast_prologue_epilogue = false;
10143 else
10144 cfun->machine->use_fast_prologue_epilogue
10145 = !expensive_function_p (count);
10148 frame->save_regs_using_mov
10149 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10150 /* If static stack checking is enabled and done with probes,
10151 the registers need to be saved before allocating the frame. */
10152 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10154 /* Skip return address. */
10155 offset = UNITS_PER_WORD;
10157 /* Skip pushed static chain. */
10158 if (ix86_static_chain_on_stack)
10159 offset += UNITS_PER_WORD;
10161 /* Skip saved base pointer. */
10162 if (frame_pointer_needed)
10163 offset += UNITS_PER_WORD;
10164 frame->hfp_save_offset = offset;
10166 /* The traditional frame pointer location is at the top of the frame. */
10167 frame->hard_frame_pointer_offset = offset;
10169 /* Register save area */
10170 offset += frame->nregs * UNITS_PER_WORD;
10171 frame->reg_save_offset = offset;
10173 /* On SEH target, registers are pushed just before the frame pointer
10174 location. */
10175 if (TARGET_SEH)
10176 frame->hard_frame_pointer_offset = offset;
10178 /* Align and set SSE register save area. */
10179 if (frame->nsseregs)
10181 /* The only ABI that has saved SSE registers (Win64) also has a
10182 16-byte aligned default stack, and thus we don't need to be
10183 within the re-aligned local stack frame to save them. */
10184 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10185 offset = (offset + 16 - 1) & -16;
10186 offset += frame->nsseregs * 16;
10188 frame->sse_reg_save_offset = offset;
10190 /* The re-aligned stack starts here. Values before this point are not
10191 directly comparable with values below this point. In order to make
10192 sure that no value happens to be the same before and after, force
10193 the alignment computation below to add a non-zero value. */
10194 if (stack_realign_fp)
10195 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10197 /* Va-arg area */
10198 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10199 offset += frame->va_arg_size;
10201 /* Align start of frame for local function. */
10202 if (stack_realign_fp
10203 || offset != frame->sse_reg_save_offset
10204 || size != 0
10205 || !crtl->is_leaf
10206 || cfun->calls_alloca
10207 || ix86_current_function_calls_tls_descriptor)
10208 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10210 /* Frame pointer points here. */
10211 frame->frame_pointer_offset = offset;
10213 offset += size;
10215 /* Add outgoing arguments area. Can be skipped if we eliminated
10216 all the function calls as dead code.
10217 Skipping is however impossible when function calls alloca. Alloca
10218 expander assumes that last crtl->outgoing_args_size
10219 of stack frame are unused. */
10220 if (ACCUMULATE_OUTGOING_ARGS
10221 && (!crtl->is_leaf || cfun->calls_alloca
10222 || ix86_current_function_calls_tls_descriptor))
10224 offset += crtl->outgoing_args_size;
10225 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10227 else
10228 frame->outgoing_arguments_size = 0;
10230 /* Align stack boundary. Only needed if we're calling another function
10231 or using alloca. */
10232 if (!crtl->is_leaf || cfun->calls_alloca
10233 || ix86_current_function_calls_tls_descriptor)
10234 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10236 /* We've reached end of stack frame. */
10237 frame->stack_pointer_offset = offset;
10239 /* Size prologue needs to allocate. */
10240 to_allocate = offset - frame->sse_reg_save_offset;
10242 if ((!to_allocate && frame->nregs <= 1)
10243 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10244 frame->save_regs_using_mov = false;
10246 if (ix86_using_red_zone ()
10247 && crtl->sp_is_unchanging
10248 && crtl->is_leaf
10249 && !ix86_current_function_calls_tls_descriptor)
10251 frame->red_zone_size = to_allocate;
10252 if (frame->save_regs_using_mov)
10253 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10254 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10255 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10257 else
10258 frame->red_zone_size = 0;
10259 frame->stack_pointer_offset -= frame->red_zone_size;
10261 /* The SEH frame pointer location is near the bottom of the frame.
10262 This is enforced by the fact that the difference between the
10263 stack pointer and the frame pointer is limited to 240 bytes in
10264 the unwind data structure. */
10265 if (TARGET_SEH)
10267 HOST_WIDE_INT diff;
10269 /* If we can leave the frame pointer where it is, do so. Also, returns
10270 the establisher frame for __builtin_frame_address (0). */
10271 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10272 if (diff <= SEH_MAX_FRAME_SIZE
10273 && (diff > 240 || (diff & 15) != 0)
10274 && !crtl->accesses_prior_frames)
10276 /* Ideally we'd determine what portion of the local stack frame
10277 (within the constraint of the lowest 240) is most heavily used.
10278 But without that complication, simply bias the frame pointer
10279 by 128 bytes so as to maximize the amount of the local stack
10280 frame that is addressable with 8-bit offsets. */
10281 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10286 /* This is semi-inlined memory_address_length, but simplified
10287 since we know that we're always dealing with reg+offset, and
10288 to avoid having to create and discard all that rtl. */
10290 static inline int
10291 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10293 int len = 4;
10295 if (offset == 0)
10297 /* EBP and R13 cannot be encoded without an offset. */
10298 len = (regno == BP_REG || regno == R13_REG);
10300 else if (IN_RANGE (offset, -128, 127))
10301 len = 1;
10303 /* ESP and R12 must be encoded with a SIB byte. */
10304 if (regno == SP_REG || regno == R12_REG)
10305 len++;
10307 return len;
10310 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10311 The valid base registers are taken from CFUN->MACHINE->FS. */
10313 static rtx
10314 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10316 const struct machine_function *m = cfun->machine;
10317 rtx base_reg = NULL;
10318 HOST_WIDE_INT base_offset = 0;
10320 if (m->use_fast_prologue_epilogue)
10322 /* Choose the base register most likely to allow the most scheduling
10323 opportunities. Generally FP is valid throughout the function,
10324 while DRAP must be reloaded within the epilogue. But choose either
10325 over the SP due to increased encoding size. */
10327 if (m->fs.fp_valid)
10329 base_reg = hard_frame_pointer_rtx;
10330 base_offset = m->fs.fp_offset - cfa_offset;
10332 else if (m->fs.drap_valid)
10334 base_reg = crtl->drap_reg;
10335 base_offset = 0 - cfa_offset;
10337 else if (m->fs.sp_valid)
10339 base_reg = stack_pointer_rtx;
10340 base_offset = m->fs.sp_offset - cfa_offset;
10343 else
10345 HOST_WIDE_INT toffset;
10346 int len = 16, tlen;
10348 /* Choose the base register with the smallest address encoding.
10349 With a tie, choose FP > DRAP > SP. */
10350 if (m->fs.sp_valid)
10352 base_reg = stack_pointer_rtx;
10353 base_offset = m->fs.sp_offset - cfa_offset;
10354 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10356 if (m->fs.drap_valid)
10358 toffset = 0 - cfa_offset;
10359 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10360 if (tlen <= len)
10362 base_reg = crtl->drap_reg;
10363 base_offset = toffset;
10364 len = tlen;
10367 if (m->fs.fp_valid)
10369 toffset = m->fs.fp_offset - cfa_offset;
10370 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10371 if (tlen <= len)
10373 base_reg = hard_frame_pointer_rtx;
10374 base_offset = toffset;
10375 len = tlen;
10379 gcc_assert (base_reg != NULL);
10381 return plus_constant (Pmode, base_reg, base_offset);
10384 /* Emit code to save registers in the prologue. */
10386 static void
10387 ix86_emit_save_regs (void)
10389 unsigned int regno;
10390 rtx_insn *insn;
10392 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10393 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10395 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10396 RTX_FRAME_RELATED_P (insn) = 1;
10400 /* Emit a single register save at CFA - CFA_OFFSET. */
10402 static void
10403 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10404 HOST_WIDE_INT cfa_offset)
10406 struct machine_function *m = cfun->machine;
10407 rtx reg = gen_rtx_REG (mode, regno);
10408 rtx mem, addr, base, insn;
10410 addr = choose_baseaddr (cfa_offset);
10411 mem = gen_frame_mem (mode, addr);
10413 /* For SSE saves, we need to indicate the 128-bit alignment. */
10414 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10416 insn = emit_move_insn (mem, reg);
10417 RTX_FRAME_RELATED_P (insn) = 1;
10419 base = addr;
10420 if (GET_CODE (base) == PLUS)
10421 base = XEXP (base, 0);
10422 gcc_checking_assert (REG_P (base));
10424 /* When saving registers into a re-aligned local stack frame, avoid
10425 any tricky guessing by dwarf2out. */
10426 if (m->fs.realigned)
10428 gcc_checking_assert (stack_realign_drap);
10430 if (regno == REGNO (crtl->drap_reg))
10432 /* A bit of a hack. We force the DRAP register to be saved in
10433 the re-aligned stack frame, which provides us with a copy
10434 of the CFA that will last past the prologue. Install it. */
10435 gcc_checking_assert (cfun->machine->fs.fp_valid);
10436 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10437 cfun->machine->fs.fp_offset - cfa_offset);
10438 mem = gen_rtx_MEM (mode, addr);
10439 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10441 else
10443 /* The frame pointer is a stable reference within the
10444 aligned frame. Use it. */
10445 gcc_checking_assert (cfun->machine->fs.fp_valid);
10446 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10447 cfun->machine->fs.fp_offset - cfa_offset);
10448 mem = gen_rtx_MEM (mode, addr);
10449 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10453 /* The memory may not be relative to the current CFA register,
10454 which means that we may need to generate a new pattern for
10455 use by the unwind info. */
10456 else if (base != m->fs.cfa_reg)
10458 addr = plus_constant (Pmode, m->fs.cfa_reg,
10459 m->fs.cfa_offset - cfa_offset);
10460 mem = gen_rtx_MEM (mode, addr);
10461 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10465 /* Emit code to save registers using MOV insns.
10466 First register is stored at CFA - CFA_OFFSET. */
10467 static void
10468 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10470 unsigned int regno;
10472 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10473 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10475 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10476 cfa_offset -= UNITS_PER_WORD;
10480 /* Emit code to save SSE registers using MOV insns.
10481 First register is stored at CFA - CFA_OFFSET. */
10482 static void
10483 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10485 unsigned int regno;
10487 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10488 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10490 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10491 cfa_offset -= 16;
10495 static GTY(()) rtx queued_cfa_restores;
10497 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10498 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10499 Don't add the note if the previously saved value will be left untouched
10500 within stack red-zone till return, as unwinders can find the same value
10501 in the register and on the stack. */
10503 static void
10504 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10506 if (!crtl->shrink_wrapped
10507 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10508 return;
10510 if (insn)
10512 add_reg_note (insn, REG_CFA_RESTORE, reg);
10513 RTX_FRAME_RELATED_P (insn) = 1;
10515 else
10516 queued_cfa_restores
10517 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10520 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10522 static void
10523 ix86_add_queued_cfa_restore_notes (rtx insn)
10525 rtx last;
10526 if (!queued_cfa_restores)
10527 return;
10528 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10530 XEXP (last, 1) = REG_NOTES (insn);
10531 REG_NOTES (insn) = queued_cfa_restores;
10532 queued_cfa_restores = NULL_RTX;
10533 RTX_FRAME_RELATED_P (insn) = 1;
10536 /* Expand prologue or epilogue stack adjustment.
10537 The pattern exist to put a dependency on all ebp-based memory accesses.
10538 STYLE should be negative if instructions should be marked as frame related,
10539 zero if %r11 register is live and cannot be freely used and positive
10540 otherwise. */
10542 static void
10543 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10544 int style, bool set_cfa)
10546 struct machine_function *m = cfun->machine;
10547 rtx insn;
10548 bool add_frame_related_expr = false;
10550 if (Pmode == SImode)
10551 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10552 else if (x86_64_immediate_operand (offset, DImode))
10553 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10554 else
10556 rtx tmp;
10557 /* r11 is used by indirect sibcall return as well, set before the
10558 epilogue and used after the epilogue. */
10559 if (style)
10560 tmp = gen_rtx_REG (DImode, R11_REG);
10561 else
10563 gcc_assert (src != hard_frame_pointer_rtx
10564 && dest != hard_frame_pointer_rtx);
10565 tmp = hard_frame_pointer_rtx;
10567 insn = emit_insn (gen_rtx_SET (tmp, offset));
10568 if (style < 0)
10569 add_frame_related_expr = true;
10571 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10574 insn = emit_insn (insn);
10575 if (style >= 0)
10576 ix86_add_queued_cfa_restore_notes (insn);
10578 if (set_cfa)
10580 rtx r;
10582 gcc_assert (m->fs.cfa_reg == src);
10583 m->fs.cfa_offset += INTVAL (offset);
10584 m->fs.cfa_reg = dest;
10586 r = gen_rtx_PLUS (Pmode, src, offset);
10587 r = gen_rtx_SET (dest, r);
10588 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10589 RTX_FRAME_RELATED_P (insn) = 1;
10591 else if (style < 0)
10593 RTX_FRAME_RELATED_P (insn) = 1;
10594 if (add_frame_related_expr)
10596 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10597 r = gen_rtx_SET (dest, r);
10598 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10602 if (dest == stack_pointer_rtx)
10604 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10605 bool valid = m->fs.sp_valid;
10607 if (src == hard_frame_pointer_rtx)
10609 valid = m->fs.fp_valid;
10610 ooffset = m->fs.fp_offset;
10612 else if (src == crtl->drap_reg)
10614 valid = m->fs.drap_valid;
10615 ooffset = 0;
10617 else
10619 /* Else there are two possibilities: SP itself, which we set
10620 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10621 taken care of this by hand along the eh_return path. */
10622 gcc_checking_assert (src == stack_pointer_rtx
10623 || offset == const0_rtx);
10626 m->fs.sp_offset = ooffset - INTVAL (offset);
10627 m->fs.sp_valid = valid;
10631 /* Find an available register to be used as dynamic realign argument
10632 pointer regsiter. Such a register will be written in prologue and
10633 used in begin of body, so it must not be
10634 1. parameter passing register.
10635 2. GOT pointer.
10636 We reuse static-chain register if it is available. Otherwise, we
10637 use DI for i386 and R13 for x86-64. We chose R13 since it has
10638 shorter encoding.
10640 Return: the regno of chosen register. */
10642 static unsigned int
10643 find_drap_reg (void)
10645 tree decl = cfun->decl;
10647 if (TARGET_64BIT)
10649 /* Use R13 for nested function or function need static chain.
10650 Since function with tail call may use any caller-saved
10651 registers in epilogue, DRAP must not use caller-saved
10652 register in such case. */
10653 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10654 return R13_REG;
10656 return R10_REG;
10658 else
10660 /* Use DI for nested function or function need static chain.
10661 Since function with tail call may use any caller-saved
10662 registers in epilogue, DRAP must not use caller-saved
10663 register in such case. */
10664 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10665 return DI_REG;
10667 /* Reuse static chain register if it isn't used for parameter
10668 passing. */
10669 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10671 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10672 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10673 return CX_REG;
10675 return DI_REG;
10679 /* Return minimum incoming stack alignment. */
10681 static unsigned int
10682 ix86_minimum_incoming_stack_boundary (bool sibcall)
10684 unsigned int incoming_stack_boundary;
10686 /* Prefer the one specified at command line. */
10687 if (ix86_user_incoming_stack_boundary)
10688 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10689 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10690 if -mstackrealign is used, it isn't used for sibcall check and
10691 estimated stack alignment is 128bit. */
10692 else if (!sibcall
10693 && !TARGET_64BIT
10694 && ix86_force_align_arg_pointer
10695 && crtl->stack_alignment_estimated == 128)
10696 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10697 else
10698 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10700 /* Incoming stack alignment can be changed on individual functions
10701 via force_align_arg_pointer attribute. We use the smallest
10702 incoming stack boundary. */
10703 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10704 && lookup_attribute (ix86_force_align_arg_pointer_string,
10705 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10706 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10708 /* The incoming stack frame has to be aligned at least at
10709 parm_stack_boundary. */
10710 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10711 incoming_stack_boundary = crtl->parm_stack_boundary;
10713 /* Stack at entrance of main is aligned by runtime. We use the
10714 smallest incoming stack boundary. */
10715 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10716 && DECL_NAME (current_function_decl)
10717 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10718 && DECL_FILE_SCOPE_P (current_function_decl))
10719 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10721 return incoming_stack_boundary;
10724 /* Update incoming stack boundary and estimated stack alignment. */
10726 static void
10727 ix86_update_stack_boundary (void)
10729 ix86_incoming_stack_boundary
10730 = ix86_minimum_incoming_stack_boundary (false);
10732 /* x86_64 vararg needs 16byte stack alignment for register save
10733 area. */
10734 if (TARGET_64BIT
10735 && cfun->stdarg
10736 && crtl->stack_alignment_estimated < 128)
10737 crtl->stack_alignment_estimated = 128;
10740 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10741 needed or an rtx for DRAP otherwise. */
10743 static rtx
10744 ix86_get_drap_rtx (void)
10746 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10747 crtl->need_drap = true;
10749 if (stack_realign_drap)
10751 /* Assign DRAP to vDRAP and returns vDRAP */
10752 unsigned int regno = find_drap_reg ();
10753 rtx drap_vreg;
10754 rtx arg_ptr;
10755 rtx_insn *seq, *insn;
10757 arg_ptr = gen_rtx_REG (Pmode, regno);
10758 crtl->drap_reg = arg_ptr;
10760 start_sequence ();
10761 drap_vreg = copy_to_reg (arg_ptr);
10762 seq = get_insns ();
10763 end_sequence ();
10765 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10766 if (!optimize)
10768 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10769 RTX_FRAME_RELATED_P (insn) = 1;
10771 return drap_vreg;
10773 else
10774 return NULL;
10777 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10779 static rtx
10780 ix86_internal_arg_pointer (void)
10782 return virtual_incoming_args_rtx;
10785 struct scratch_reg {
10786 rtx reg;
10787 bool saved;
10790 /* Return a short-lived scratch register for use on function entry.
10791 In 32-bit mode, it is valid only after the registers are saved
10792 in the prologue. This register must be released by means of
10793 release_scratch_register_on_entry once it is dead. */
10795 static void
10796 get_scratch_register_on_entry (struct scratch_reg *sr)
10798 int regno;
10800 sr->saved = false;
10802 if (TARGET_64BIT)
10804 /* We always use R11 in 64-bit mode. */
10805 regno = R11_REG;
10807 else
10809 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10810 bool fastcall_p
10811 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10812 bool thiscall_p
10813 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10814 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10815 int regparm = ix86_function_regparm (fntype, decl);
10816 int drap_regno
10817 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10819 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10820 for the static chain register. */
10821 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10822 && drap_regno != AX_REG)
10823 regno = AX_REG;
10824 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10825 for the static chain register. */
10826 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10827 regno = AX_REG;
10828 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10829 regno = DX_REG;
10830 /* ecx is the static chain register. */
10831 else if (regparm < 3 && !fastcall_p && !thiscall_p
10832 && !static_chain_p
10833 && drap_regno != CX_REG)
10834 regno = CX_REG;
10835 else if (ix86_save_reg (BX_REG, true))
10836 regno = BX_REG;
10837 /* esi is the static chain register. */
10838 else if (!(regparm == 3 && static_chain_p)
10839 && ix86_save_reg (SI_REG, true))
10840 regno = SI_REG;
10841 else if (ix86_save_reg (DI_REG, true))
10842 regno = DI_REG;
10843 else
10845 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10846 sr->saved = true;
10850 sr->reg = gen_rtx_REG (Pmode, regno);
10851 if (sr->saved)
10853 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10854 RTX_FRAME_RELATED_P (insn) = 1;
10858 /* Release a scratch register obtained from the preceding function. */
10860 static void
10861 release_scratch_register_on_entry (struct scratch_reg *sr)
10863 if (sr->saved)
10865 struct machine_function *m = cfun->machine;
10866 rtx x, insn = emit_insn (gen_pop (sr->reg));
10868 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10869 RTX_FRAME_RELATED_P (insn) = 1;
10870 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10871 x = gen_rtx_SET (stack_pointer_rtx, x);
10872 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10873 m->fs.sp_offset -= UNITS_PER_WORD;
10877 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10879 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10881 static void
10882 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10884 /* We skip the probe for the first interval + a small dope of 4 words and
10885 probe that many bytes past the specified size to maintain a protection
10886 area at the botton of the stack. */
10887 const int dope = 4 * UNITS_PER_WORD;
10888 rtx size_rtx = GEN_INT (size), last;
10890 /* See if we have a constant small number of probes to generate. If so,
10891 that's the easy case. The run-time loop is made up of 11 insns in the
10892 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10893 for n # of intervals. */
10894 if (size <= 5 * PROBE_INTERVAL)
10896 HOST_WIDE_INT i, adjust;
10897 bool first_probe = true;
10899 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10900 values of N from 1 until it exceeds SIZE. If only one probe is
10901 needed, this will not generate any code. Then adjust and probe
10902 to PROBE_INTERVAL + SIZE. */
10903 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10905 if (first_probe)
10907 adjust = 2 * PROBE_INTERVAL + dope;
10908 first_probe = false;
10910 else
10911 adjust = PROBE_INTERVAL;
10913 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10914 plus_constant (Pmode, stack_pointer_rtx,
10915 -adjust)));
10916 emit_stack_probe (stack_pointer_rtx);
10919 if (first_probe)
10920 adjust = size + PROBE_INTERVAL + dope;
10921 else
10922 adjust = size + PROBE_INTERVAL - i;
10924 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10925 plus_constant (Pmode, stack_pointer_rtx,
10926 -adjust)));
10927 emit_stack_probe (stack_pointer_rtx);
10929 /* Adjust back to account for the additional first interval. */
10930 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
10931 plus_constant (Pmode, stack_pointer_rtx,
10932 PROBE_INTERVAL + dope)));
10935 /* Otherwise, do the same as above, but in a loop. Note that we must be
10936 extra careful with variables wrapping around because we might be at
10937 the very top (or the very bottom) of the address space and we have
10938 to be able to handle this case properly; in particular, we use an
10939 equality test for the loop condition. */
10940 else
10942 HOST_WIDE_INT rounded_size;
10943 struct scratch_reg sr;
10945 get_scratch_register_on_entry (&sr);
10948 /* Step 1: round SIZE to the previous multiple of the interval. */
10950 rounded_size = size & -PROBE_INTERVAL;
10953 /* Step 2: compute initial and final value of the loop counter. */
10955 /* SP = SP_0 + PROBE_INTERVAL. */
10956 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10957 plus_constant (Pmode, stack_pointer_rtx,
10958 - (PROBE_INTERVAL + dope))));
10960 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10961 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10962 emit_insn (gen_rtx_SET (sr.reg,
10963 gen_rtx_PLUS (Pmode, sr.reg,
10964 stack_pointer_rtx)));
10967 /* Step 3: the loop
10969 while (SP != LAST_ADDR)
10971 SP = SP + PROBE_INTERVAL
10972 probe at SP
10975 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10976 values of N from 1 until it is equal to ROUNDED_SIZE. */
10978 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10981 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10982 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10984 if (size != rounded_size)
10986 emit_insn (gen_rtx_SET (stack_pointer_rtx,
10987 plus_constant (Pmode, stack_pointer_rtx,
10988 rounded_size - size)));
10989 emit_stack_probe (stack_pointer_rtx);
10992 /* Adjust back to account for the additional first interval. */
10993 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
10994 plus_constant (Pmode, stack_pointer_rtx,
10995 PROBE_INTERVAL + dope)));
10997 release_scratch_register_on_entry (&sr);
11000 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11002 /* Even if the stack pointer isn't the CFA register, we need to correctly
11003 describe the adjustments made to it, in particular differentiate the
11004 frame-related ones from the frame-unrelated ones. */
11005 if (size > 0)
11007 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11008 XVECEXP (expr, 0, 0)
11009 = gen_rtx_SET (stack_pointer_rtx,
11010 plus_constant (Pmode, stack_pointer_rtx, -size));
11011 XVECEXP (expr, 0, 1)
11012 = gen_rtx_SET (stack_pointer_rtx,
11013 plus_constant (Pmode, stack_pointer_rtx,
11014 PROBE_INTERVAL + dope + size));
11015 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11016 RTX_FRAME_RELATED_P (last) = 1;
11018 cfun->machine->fs.sp_offset += size;
11021 /* Make sure nothing is scheduled before we are done. */
11022 emit_insn (gen_blockage ());
11025 /* Adjust the stack pointer up to REG while probing it. */
11027 const char *
11028 output_adjust_stack_and_probe (rtx reg)
11030 static int labelno = 0;
11031 char loop_lab[32], end_lab[32];
11032 rtx xops[2];
11034 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11035 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11037 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11039 /* Jump to END_LAB if SP == LAST_ADDR. */
11040 xops[0] = stack_pointer_rtx;
11041 xops[1] = reg;
11042 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11043 fputs ("\tje\t", asm_out_file);
11044 assemble_name_raw (asm_out_file, end_lab);
11045 fputc ('\n', asm_out_file);
11047 /* SP = SP + PROBE_INTERVAL. */
11048 xops[1] = GEN_INT (PROBE_INTERVAL);
11049 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11051 /* Probe at SP. */
11052 xops[1] = const0_rtx;
11053 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11055 fprintf (asm_out_file, "\tjmp\t");
11056 assemble_name_raw (asm_out_file, loop_lab);
11057 fputc ('\n', asm_out_file);
11059 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11061 return "";
11064 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11065 inclusive. These are offsets from the current stack pointer. */
11067 static void
11068 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11070 /* See if we have a constant small number of probes to generate. If so,
11071 that's the easy case. The run-time loop is made up of 7 insns in the
11072 generic case while the compile-time loop is made up of n insns for n #
11073 of intervals. */
11074 if (size <= 7 * PROBE_INTERVAL)
11076 HOST_WIDE_INT i;
11078 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11079 it exceeds SIZE. If only one probe is needed, this will not
11080 generate any code. Then probe at FIRST + SIZE. */
11081 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11082 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11083 -(first + i)));
11085 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11086 -(first + size)));
11089 /* Otherwise, do the same as above, but in a loop. Note that we must be
11090 extra careful with variables wrapping around because we might be at
11091 the very top (or the very bottom) of the address space and we have
11092 to be able to handle this case properly; in particular, we use an
11093 equality test for the loop condition. */
11094 else
11096 HOST_WIDE_INT rounded_size, last;
11097 struct scratch_reg sr;
11099 get_scratch_register_on_entry (&sr);
11102 /* Step 1: round SIZE to the previous multiple of the interval. */
11104 rounded_size = size & -PROBE_INTERVAL;
11107 /* Step 2: compute initial and final value of the loop counter. */
11109 /* TEST_OFFSET = FIRST. */
11110 emit_move_insn (sr.reg, GEN_INT (-first));
11112 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11113 last = first + rounded_size;
11116 /* Step 3: the loop
11118 while (TEST_ADDR != LAST_ADDR)
11120 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11121 probe at TEST_ADDR
11124 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11125 until it is equal to ROUNDED_SIZE. */
11127 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11130 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11131 that SIZE is equal to ROUNDED_SIZE. */
11133 if (size != rounded_size)
11134 emit_stack_probe (plus_constant (Pmode,
11135 gen_rtx_PLUS (Pmode,
11136 stack_pointer_rtx,
11137 sr.reg),
11138 rounded_size - size));
11140 release_scratch_register_on_entry (&sr);
11143 /* Make sure nothing is scheduled before we are done. */
11144 emit_insn (gen_blockage ());
11147 /* Probe a range of stack addresses from REG to END, inclusive. These are
11148 offsets from the current stack pointer. */
11150 const char *
11151 output_probe_stack_range (rtx reg, rtx end)
11153 static int labelno = 0;
11154 char loop_lab[32], end_lab[32];
11155 rtx xops[3];
11157 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11158 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11160 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11162 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11163 xops[0] = reg;
11164 xops[1] = end;
11165 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11166 fputs ("\tje\t", asm_out_file);
11167 assemble_name_raw (asm_out_file, end_lab);
11168 fputc ('\n', asm_out_file);
11170 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11171 xops[1] = GEN_INT (PROBE_INTERVAL);
11172 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11174 /* Probe at TEST_ADDR. */
11175 xops[0] = stack_pointer_rtx;
11176 xops[1] = reg;
11177 xops[2] = const0_rtx;
11178 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11180 fprintf (asm_out_file, "\tjmp\t");
11181 assemble_name_raw (asm_out_file, loop_lab);
11182 fputc ('\n', asm_out_file);
11184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11186 return "";
11189 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11190 to be generated in correct form. */
11191 static void
11192 ix86_finalize_stack_realign_flags (void)
11194 /* Check if stack realign is really needed after reload, and
11195 stores result in cfun */
11196 unsigned int incoming_stack_boundary
11197 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11198 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11199 unsigned int stack_realign = (incoming_stack_boundary
11200 < (crtl->is_leaf
11201 ? crtl->max_used_stack_slot_alignment
11202 : crtl->stack_alignment_needed));
11204 if (crtl->stack_realign_finalized)
11206 /* After stack_realign_needed is finalized, we can't no longer
11207 change it. */
11208 gcc_assert (crtl->stack_realign_needed == stack_realign);
11209 return;
11212 /* If the only reason for frame_pointer_needed is that we conservatively
11213 assumed stack realignment might be needed, but in the end nothing that
11214 needed the stack alignment had been spilled, clear frame_pointer_needed
11215 and say we don't need stack realignment. */
11216 if (stack_realign
11217 && frame_pointer_needed
11218 && crtl->is_leaf
11219 && flag_omit_frame_pointer
11220 && crtl->sp_is_unchanging
11221 && !ix86_current_function_calls_tls_descriptor
11222 && !crtl->accesses_prior_frames
11223 && !cfun->calls_alloca
11224 && !crtl->calls_eh_return
11225 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11226 && !ix86_frame_pointer_required ()
11227 && get_frame_size () == 0
11228 && ix86_nsaved_sseregs () == 0
11229 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11231 HARD_REG_SET set_up_by_prologue, prologue_used;
11232 basic_block bb;
11234 CLEAR_HARD_REG_SET (prologue_used);
11235 CLEAR_HARD_REG_SET (set_up_by_prologue);
11236 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11237 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11238 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11239 HARD_FRAME_POINTER_REGNUM);
11240 FOR_EACH_BB_FN (bb, cfun)
11242 rtx_insn *insn;
11243 FOR_BB_INSNS (bb, insn)
11244 if (NONDEBUG_INSN_P (insn)
11245 && requires_stack_frame_p (insn, prologue_used,
11246 set_up_by_prologue))
11248 crtl->stack_realign_needed = stack_realign;
11249 crtl->stack_realign_finalized = true;
11250 return;
11254 /* If drap has been set, but it actually isn't live at the start
11255 of the function, there is no reason to set it up. */
11256 if (crtl->drap_reg)
11258 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11259 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11261 crtl->drap_reg = NULL_RTX;
11262 crtl->need_drap = false;
11265 else
11266 cfun->machine->no_drap_save_restore = true;
11268 frame_pointer_needed = false;
11269 stack_realign = false;
11270 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11271 crtl->stack_alignment_needed = incoming_stack_boundary;
11272 crtl->stack_alignment_estimated = incoming_stack_boundary;
11273 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11274 crtl->preferred_stack_boundary = incoming_stack_boundary;
11275 df_finish_pass (true);
11276 df_scan_alloc (NULL);
11277 df_scan_blocks ();
11278 df_compute_regs_ever_live (true);
11279 df_analyze ();
11282 crtl->stack_realign_needed = stack_realign;
11283 crtl->stack_realign_finalized = true;
11286 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11288 static void
11289 ix86_elim_entry_set_got (rtx reg)
11291 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11292 rtx_insn *c_insn = BB_HEAD (bb);
11293 if (!NONDEBUG_INSN_P (c_insn))
11294 c_insn = next_nonnote_nondebug_insn (c_insn);
11295 if (c_insn && NONJUMP_INSN_P (c_insn))
11297 rtx pat = PATTERN (c_insn);
11298 if (GET_CODE (pat) == PARALLEL)
11300 rtx vec = XVECEXP (pat, 0, 0);
11301 if (GET_CODE (vec) == SET
11302 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11303 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11304 delete_insn (c_insn);
11309 /* Expand the prologue into a bunch of separate insns. */
11311 void
11312 ix86_expand_prologue (void)
11314 struct machine_function *m = cfun->machine;
11315 rtx insn, t;
11316 struct ix86_frame frame;
11317 HOST_WIDE_INT allocate;
11318 bool int_registers_saved;
11319 bool sse_registers_saved;
11321 ix86_finalize_stack_realign_flags ();
11323 /* DRAP should not coexist with stack_realign_fp */
11324 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11326 memset (&m->fs, 0, sizeof (m->fs));
11328 /* Initialize CFA state for before the prologue. */
11329 m->fs.cfa_reg = stack_pointer_rtx;
11330 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11332 /* Track SP offset to the CFA. We continue tracking this after we've
11333 swapped the CFA register away from SP. In the case of re-alignment
11334 this is fudged; we're interested to offsets within the local frame. */
11335 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11336 m->fs.sp_valid = true;
11338 ix86_compute_frame_layout (&frame);
11340 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11342 /* We should have already generated an error for any use of
11343 ms_hook on a nested function. */
11344 gcc_checking_assert (!ix86_static_chain_on_stack);
11346 /* Check if profiling is active and we shall use profiling before
11347 prologue variant. If so sorry. */
11348 if (crtl->profile && flag_fentry != 0)
11349 sorry ("ms_hook_prologue attribute isn%'t compatible "
11350 "with -mfentry for 32-bit");
11352 /* In ix86_asm_output_function_label we emitted:
11353 8b ff movl.s %edi,%edi
11354 55 push %ebp
11355 8b ec movl.s %esp,%ebp
11357 This matches the hookable function prologue in Win32 API
11358 functions in Microsoft Windows XP Service Pack 2 and newer.
11359 Wine uses this to enable Windows apps to hook the Win32 API
11360 functions provided by Wine.
11362 What that means is that we've already set up the frame pointer. */
11364 if (frame_pointer_needed
11365 && !(crtl->drap_reg && crtl->stack_realign_needed))
11367 rtx push, mov;
11369 /* We've decided to use the frame pointer already set up.
11370 Describe this to the unwinder by pretending that both
11371 push and mov insns happen right here.
11373 Putting the unwind info here at the end of the ms_hook
11374 is done so that we can make absolutely certain we get
11375 the required byte sequence at the start of the function,
11376 rather than relying on an assembler that can produce
11377 the exact encoding required.
11379 However it does mean (in the unpatched case) that we have
11380 a 1 insn window where the asynchronous unwind info is
11381 incorrect. However, if we placed the unwind info at
11382 its correct location we would have incorrect unwind info
11383 in the patched case. Which is probably all moot since
11384 I don't expect Wine generates dwarf2 unwind info for the
11385 system libraries that use this feature. */
11387 insn = emit_insn (gen_blockage ());
11389 push = gen_push (hard_frame_pointer_rtx);
11390 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11391 stack_pointer_rtx);
11392 RTX_FRAME_RELATED_P (push) = 1;
11393 RTX_FRAME_RELATED_P (mov) = 1;
11395 RTX_FRAME_RELATED_P (insn) = 1;
11396 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11397 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11399 /* Note that gen_push incremented m->fs.cfa_offset, even
11400 though we didn't emit the push insn here. */
11401 m->fs.cfa_reg = hard_frame_pointer_rtx;
11402 m->fs.fp_offset = m->fs.cfa_offset;
11403 m->fs.fp_valid = true;
11405 else
11407 /* The frame pointer is not needed so pop %ebp again.
11408 This leaves us with a pristine state. */
11409 emit_insn (gen_pop (hard_frame_pointer_rtx));
11413 /* The first insn of a function that accepts its static chain on the
11414 stack is to push the register that would be filled in by a direct
11415 call. This insn will be skipped by the trampoline. */
11416 else if (ix86_static_chain_on_stack)
11418 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11419 emit_insn (gen_blockage ());
11421 /* We don't want to interpret this push insn as a register save,
11422 only as a stack adjustment. The real copy of the register as
11423 a save will be done later, if needed. */
11424 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11425 t = gen_rtx_SET (stack_pointer_rtx, t);
11426 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11427 RTX_FRAME_RELATED_P (insn) = 1;
11430 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11431 of DRAP is needed and stack realignment is really needed after reload */
11432 if (stack_realign_drap)
11434 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11436 /* Only need to push parameter pointer reg if it is caller saved. */
11437 if (!call_used_regs[REGNO (crtl->drap_reg)])
11439 /* Push arg pointer reg */
11440 insn = emit_insn (gen_push (crtl->drap_reg));
11441 RTX_FRAME_RELATED_P (insn) = 1;
11444 /* Grab the argument pointer. */
11445 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11446 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11447 RTX_FRAME_RELATED_P (insn) = 1;
11448 m->fs.cfa_reg = crtl->drap_reg;
11449 m->fs.cfa_offset = 0;
11451 /* Align the stack. */
11452 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11453 stack_pointer_rtx,
11454 GEN_INT (-align_bytes)));
11455 RTX_FRAME_RELATED_P (insn) = 1;
11457 /* Replicate the return address on the stack so that return
11458 address can be reached via (argp - 1) slot. This is needed
11459 to implement macro RETURN_ADDR_RTX and intrinsic function
11460 expand_builtin_return_addr etc. */
11461 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11462 t = gen_frame_mem (word_mode, t);
11463 insn = emit_insn (gen_push (t));
11464 RTX_FRAME_RELATED_P (insn) = 1;
11466 /* For the purposes of frame and register save area addressing,
11467 we've started over with a new frame. */
11468 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11469 m->fs.realigned = true;
11472 int_registers_saved = (frame.nregs == 0);
11473 sse_registers_saved = (frame.nsseregs == 0);
11475 if (frame_pointer_needed && !m->fs.fp_valid)
11477 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11478 slower on all targets. Also sdb doesn't like it. */
11479 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11480 RTX_FRAME_RELATED_P (insn) = 1;
11482 /* Push registers now, before setting the frame pointer
11483 on SEH target. */
11484 if (!int_registers_saved
11485 && TARGET_SEH
11486 && !frame.save_regs_using_mov)
11488 ix86_emit_save_regs ();
11489 int_registers_saved = true;
11490 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11493 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11495 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11496 RTX_FRAME_RELATED_P (insn) = 1;
11498 if (m->fs.cfa_reg == stack_pointer_rtx)
11499 m->fs.cfa_reg = hard_frame_pointer_rtx;
11500 m->fs.fp_offset = m->fs.sp_offset;
11501 m->fs.fp_valid = true;
11505 if (!int_registers_saved)
11507 /* If saving registers via PUSH, do so now. */
11508 if (!frame.save_regs_using_mov)
11510 ix86_emit_save_regs ();
11511 int_registers_saved = true;
11512 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11515 /* When using red zone we may start register saving before allocating
11516 the stack frame saving one cycle of the prologue. However, avoid
11517 doing this if we have to probe the stack; at least on x86_64 the
11518 stack probe can turn into a call that clobbers a red zone location. */
11519 else if (ix86_using_red_zone ()
11520 && (! TARGET_STACK_PROBE
11521 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11523 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11524 int_registers_saved = true;
11528 if (stack_realign_fp)
11530 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11531 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11533 /* The computation of the size of the re-aligned stack frame means
11534 that we must allocate the size of the register save area before
11535 performing the actual alignment. Otherwise we cannot guarantee
11536 that there's enough storage above the realignment point. */
11537 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11538 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11539 GEN_INT (m->fs.sp_offset
11540 - frame.sse_reg_save_offset),
11541 -1, false);
11543 /* Align the stack. */
11544 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11545 stack_pointer_rtx,
11546 GEN_INT (-align_bytes)));
11548 /* For the purposes of register save area addressing, the stack
11549 pointer is no longer valid. As for the value of sp_offset,
11550 see ix86_compute_frame_layout, which we need to match in order
11551 to pass verification of stack_pointer_offset at the end. */
11552 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11553 m->fs.sp_valid = false;
11556 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11558 if (flag_stack_usage_info)
11560 /* We start to count from ARG_POINTER. */
11561 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11563 /* If it was realigned, take into account the fake frame. */
11564 if (stack_realign_drap)
11566 if (ix86_static_chain_on_stack)
11567 stack_size += UNITS_PER_WORD;
11569 if (!call_used_regs[REGNO (crtl->drap_reg)])
11570 stack_size += UNITS_PER_WORD;
11572 /* This over-estimates by 1 minimal-stack-alignment-unit but
11573 mitigates that by counting in the new return address slot. */
11574 current_function_dynamic_stack_size
11575 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11578 current_function_static_stack_size = stack_size;
11581 /* On SEH target with very large frame size, allocate an area to save
11582 SSE registers (as the very large allocation won't be described). */
11583 if (TARGET_SEH
11584 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11585 && !sse_registers_saved)
11587 HOST_WIDE_INT sse_size =
11588 frame.sse_reg_save_offset - frame.reg_save_offset;
11590 gcc_assert (int_registers_saved);
11592 /* No need to do stack checking as the area will be immediately
11593 written. */
11594 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11595 GEN_INT (-sse_size), -1,
11596 m->fs.cfa_reg == stack_pointer_rtx);
11597 allocate -= sse_size;
11598 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11599 sse_registers_saved = true;
11602 /* The stack has already been decremented by the instruction calling us
11603 so probe if the size is non-negative to preserve the protection area. */
11604 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11606 /* We expect the registers to be saved when probes are used. */
11607 gcc_assert (int_registers_saved);
11609 if (STACK_CHECK_MOVING_SP)
11611 if (!(crtl->is_leaf && !cfun->calls_alloca
11612 && allocate <= PROBE_INTERVAL))
11614 ix86_adjust_stack_and_probe (allocate);
11615 allocate = 0;
11618 else
11620 HOST_WIDE_INT size = allocate;
11622 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11623 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11625 if (TARGET_STACK_PROBE)
11627 if (crtl->is_leaf && !cfun->calls_alloca)
11629 if (size > PROBE_INTERVAL)
11630 ix86_emit_probe_stack_range (0, size);
11632 else
11633 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11635 else
11637 if (crtl->is_leaf && !cfun->calls_alloca)
11639 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11640 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11641 size - STACK_CHECK_PROTECT);
11643 else
11644 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11649 if (allocate == 0)
11651 else if (!ix86_target_stack_probe ()
11652 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11654 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11655 GEN_INT (-allocate), -1,
11656 m->fs.cfa_reg == stack_pointer_rtx);
11658 else
11660 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11661 rtx r10 = NULL;
11662 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11663 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11664 bool eax_live = ix86_eax_live_at_start_p ();
11665 bool r10_live = false;
11667 if (TARGET_64BIT)
11668 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11670 if (eax_live)
11672 insn = emit_insn (gen_push (eax));
11673 allocate -= UNITS_PER_WORD;
11674 /* Note that SEH directives need to continue tracking the stack
11675 pointer even after the frame pointer has been set up. */
11676 if (sp_is_cfa_reg || TARGET_SEH)
11678 if (sp_is_cfa_reg)
11679 m->fs.cfa_offset += UNITS_PER_WORD;
11680 RTX_FRAME_RELATED_P (insn) = 1;
11681 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11682 gen_rtx_SET (stack_pointer_rtx,
11683 plus_constant (Pmode, stack_pointer_rtx,
11684 -UNITS_PER_WORD)));
11688 if (r10_live)
11690 r10 = gen_rtx_REG (Pmode, R10_REG);
11691 insn = emit_insn (gen_push (r10));
11692 allocate -= UNITS_PER_WORD;
11693 if (sp_is_cfa_reg || TARGET_SEH)
11695 if (sp_is_cfa_reg)
11696 m->fs.cfa_offset += UNITS_PER_WORD;
11697 RTX_FRAME_RELATED_P (insn) = 1;
11698 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11699 gen_rtx_SET (stack_pointer_rtx,
11700 plus_constant (Pmode, stack_pointer_rtx,
11701 -UNITS_PER_WORD)));
11705 emit_move_insn (eax, GEN_INT (allocate));
11706 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11708 /* Use the fact that AX still contains ALLOCATE. */
11709 adjust_stack_insn = (Pmode == DImode
11710 ? gen_pro_epilogue_adjust_stack_di_sub
11711 : gen_pro_epilogue_adjust_stack_si_sub);
11713 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11714 stack_pointer_rtx, eax));
11716 if (sp_is_cfa_reg || TARGET_SEH)
11718 if (sp_is_cfa_reg)
11719 m->fs.cfa_offset += allocate;
11720 RTX_FRAME_RELATED_P (insn) = 1;
11721 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11722 gen_rtx_SET (stack_pointer_rtx,
11723 plus_constant (Pmode, stack_pointer_rtx,
11724 -allocate)));
11726 m->fs.sp_offset += allocate;
11728 /* Use stack_pointer_rtx for relative addressing so that code
11729 works for realigned stack, too. */
11730 if (r10_live && eax_live)
11732 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11733 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11734 gen_frame_mem (word_mode, t));
11735 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11736 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11737 gen_frame_mem (word_mode, t));
11739 else if (eax_live || r10_live)
11741 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11742 emit_move_insn (gen_rtx_REG (word_mode,
11743 (eax_live ? AX_REG : R10_REG)),
11744 gen_frame_mem (word_mode, t));
11747 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11749 /* If we havn't already set up the frame pointer, do so now. */
11750 if (frame_pointer_needed && !m->fs.fp_valid)
11752 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11753 GEN_INT (frame.stack_pointer_offset
11754 - frame.hard_frame_pointer_offset));
11755 insn = emit_insn (insn);
11756 RTX_FRAME_RELATED_P (insn) = 1;
11757 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11759 if (m->fs.cfa_reg == stack_pointer_rtx)
11760 m->fs.cfa_reg = hard_frame_pointer_rtx;
11761 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11762 m->fs.fp_valid = true;
11765 if (!int_registers_saved)
11766 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11767 if (!sse_registers_saved)
11768 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11770 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11771 in PROLOGUE. */
11772 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11774 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11775 insn = emit_insn (gen_set_got (pic));
11776 RTX_FRAME_RELATED_P (insn) = 1;
11777 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11778 emit_insn (gen_prologue_use (pic));
11779 /* Deleting already emmitted SET_GOT if exist and allocated to
11780 REAL_PIC_OFFSET_TABLE_REGNUM. */
11781 ix86_elim_entry_set_got (pic);
11784 if (crtl->drap_reg && !crtl->stack_realign_needed)
11786 /* vDRAP is setup but after reload it turns out stack realign
11787 isn't necessary, here we will emit prologue to setup DRAP
11788 without stack realign adjustment */
11789 t = choose_baseaddr (0);
11790 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11793 /* Prevent instructions from being scheduled into register save push
11794 sequence when access to the redzone area is done through frame pointer.
11795 The offset between the frame pointer and the stack pointer is calculated
11796 relative to the value of the stack pointer at the end of the function
11797 prologue, and moving instructions that access redzone area via frame
11798 pointer inside push sequence violates this assumption. */
11799 if (frame_pointer_needed && frame.red_zone_size)
11800 emit_insn (gen_memory_blockage ());
11802 /* Emit cld instruction if stringops are used in the function. */
11803 if (TARGET_CLD && ix86_current_function_needs_cld)
11804 emit_insn (gen_cld ());
11806 /* SEH requires that the prologue end within 256 bytes of the start of
11807 the function. Prevent instruction schedules that would extend that.
11808 Further, prevent alloca modifications to the stack pointer from being
11809 combined with prologue modifications. */
11810 if (TARGET_SEH)
11811 emit_insn (gen_prologue_use (stack_pointer_rtx));
11814 /* Emit code to restore REG using a POP insn. */
11816 static void
11817 ix86_emit_restore_reg_using_pop (rtx reg)
11819 struct machine_function *m = cfun->machine;
11820 rtx_insn *insn = emit_insn (gen_pop (reg));
11822 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11823 m->fs.sp_offset -= UNITS_PER_WORD;
11825 if (m->fs.cfa_reg == crtl->drap_reg
11826 && REGNO (reg) == REGNO (crtl->drap_reg))
11828 /* Previously we'd represented the CFA as an expression
11829 like *(%ebp - 8). We've just popped that value from
11830 the stack, which means we need to reset the CFA to
11831 the drap register. This will remain until we restore
11832 the stack pointer. */
11833 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11834 RTX_FRAME_RELATED_P (insn) = 1;
11836 /* This means that the DRAP register is valid for addressing too. */
11837 m->fs.drap_valid = true;
11838 return;
11841 if (m->fs.cfa_reg == stack_pointer_rtx)
11843 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11844 x = gen_rtx_SET (stack_pointer_rtx, x);
11845 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11846 RTX_FRAME_RELATED_P (insn) = 1;
11848 m->fs.cfa_offset -= UNITS_PER_WORD;
11851 /* When the frame pointer is the CFA, and we pop it, we are
11852 swapping back to the stack pointer as the CFA. This happens
11853 for stack frames that don't allocate other data, so we assume
11854 the stack pointer is now pointing at the return address, i.e.
11855 the function entry state, which makes the offset be 1 word. */
11856 if (reg == hard_frame_pointer_rtx)
11858 m->fs.fp_valid = false;
11859 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11861 m->fs.cfa_reg = stack_pointer_rtx;
11862 m->fs.cfa_offset -= UNITS_PER_WORD;
11864 add_reg_note (insn, REG_CFA_DEF_CFA,
11865 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11866 GEN_INT (m->fs.cfa_offset)));
11867 RTX_FRAME_RELATED_P (insn) = 1;
11872 /* Emit code to restore saved registers using POP insns. */
11874 static void
11875 ix86_emit_restore_regs_using_pop (void)
11877 unsigned int regno;
11879 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11880 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11881 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11884 /* Emit code and notes for the LEAVE instruction. */
11886 static void
11887 ix86_emit_leave (void)
11889 struct machine_function *m = cfun->machine;
11890 rtx_insn *insn = emit_insn (ix86_gen_leave ());
11892 ix86_add_queued_cfa_restore_notes (insn);
11894 gcc_assert (m->fs.fp_valid);
11895 m->fs.sp_valid = true;
11896 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11897 m->fs.fp_valid = false;
11899 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11901 m->fs.cfa_reg = stack_pointer_rtx;
11902 m->fs.cfa_offset = m->fs.sp_offset;
11904 add_reg_note (insn, REG_CFA_DEF_CFA,
11905 plus_constant (Pmode, stack_pointer_rtx,
11906 m->fs.sp_offset));
11907 RTX_FRAME_RELATED_P (insn) = 1;
11909 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11910 m->fs.fp_offset);
11913 /* Emit code to restore saved registers using MOV insns.
11914 First register is restored from CFA - CFA_OFFSET. */
11915 static void
11916 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11917 bool maybe_eh_return)
11919 struct machine_function *m = cfun->machine;
11920 unsigned int regno;
11922 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11923 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11925 rtx reg = gen_rtx_REG (word_mode, regno);
11926 rtx mem;
11927 rtx_insn *insn;
11929 mem = choose_baseaddr (cfa_offset);
11930 mem = gen_frame_mem (word_mode, mem);
11931 insn = emit_move_insn (reg, mem);
11933 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11935 /* Previously we'd represented the CFA as an expression
11936 like *(%ebp - 8). We've just popped that value from
11937 the stack, which means we need to reset the CFA to
11938 the drap register. This will remain until we restore
11939 the stack pointer. */
11940 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11941 RTX_FRAME_RELATED_P (insn) = 1;
11943 /* This means that the DRAP register is valid for addressing. */
11944 m->fs.drap_valid = true;
11946 else
11947 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11949 cfa_offset -= UNITS_PER_WORD;
11953 /* Emit code to restore saved registers using MOV insns.
11954 First register is restored from CFA - CFA_OFFSET. */
11955 static void
11956 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11957 bool maybe_eh_return)
11959 unsigned int regno;
11961 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11962 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11964 rtx reg = gen_rtx_REG (V4SFmode, regno);
11965 rtx mem;
11967 mem = choose_baseaddr (cfa_offset);
11968 mem = gen_rtx_MEM (V4SFmode, mem);
11969 set_mem_align (mem, 128);
11970 emit_move_insn (reg, mem);
11972 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
11974 cfa_offset -= 16;
11978 /* Restore function stack, frame, and registers. */
11980 void
11981 ix86_expand_epilogue (int style)
11983 struct machine_function *m = cfun->machine;
11984 struct machine_frame_state frame_state_save = m->fs;
11985 struct ix86_frame frame;
11986 bool restore_regs_via_mov;
11987 bool using_drap;
11989 ix86_finalize_stack_realign_flags ();
11990 ix86_compute_frame_layout (&frame);
11992 m->fs.sp_valid = (!frame_pointer_needed
11993 || (crtl->sp_is_unchanging
11994 && !stack_realign_fp));
11995 gcc_assert (!m->fs.sp_valid
11996 || m->fs.sp_offset == frame.stack_pointer_offset);
11998 /* The FP must be valid if the frame pointer is present. */
11999 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12000 gcc_assert (!m->fs.fp_valid
12001 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12003 /* We must have *some* valid pointer to the stack frame. */
12004 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12006 /* The DRAP is never valid at this point. */
12007 gcc_assert (!m->fs.drap_valid);
12009 /* See the comment about red zone and frame
12010 pointer usage in ix86_expand_prologue. */
12011 if (frame_pointer_needed && frame.red_zone_size)
12012 emit_insn (gen_memory_blockage ());
12014 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12015 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12017 /* Determine the CFA offset of the end of the red-zone. */
12018 m->fs.red_zone_offset = 0;
12019 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12021 /* The red-zone begins below the return address. */
12022 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12024 /* When the register save area is in the aligned portion of
12025 the stack, determine the maximum runtime displacement that
12026 matches up with the aligned frame. */
12027 if (stack_realign_drap)
12028 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12029 + UNITS_PER_WORD);
12032 /* Special care must be taken for the normal return case of a function
12033 using eh_return: the eax and edx registers are marked as saved, but
12034 not restored along this path. Adjust the save location to match. */
12035 if (crtl->calls_eh_return && style != 2)
12036 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12038 /* EH_RETURN requires the use of moves to function properly. */
12039 if (crtl->calls_eh_return)
12040 restore_regs_via_mov = true;
12041 /* SEH requires the use of pops to identify the epilogue. */
12042 else if (TARGET_SEH)
12043 restore_regs_via_mov = false;
12044 /* If we're only restoring one register and sp is not valid then
12045 using a move instruction to restore the register since it's
12046 less work than reloading sp and popping the register. */
12047 else if (!m->fs.sp_valid && frame.nregs <= 1)
12048 restore_regs_via_mov = true;
12049 else if (TARGET_EPILOGUE_USING_MOVE
12050 && cfun->machine->use_fast_prologue_epilogue
12051 && (frame.nregs > 1
12052 || m->fs.sp_offset != frame.reg_save_offset))
12053 restore_regs_via_mov = true;
12054 else if (frame_pointer_needed
12055 && !frame.nregs
12056 && m->fs.sp_offset != frame.reg_save_offset)
12057 restore_regs_via_mov = true;
12058 else if (frame_pointer_needed
12059 && TARGET_USE_LEAVE
12060 && cfun->machine->use_fast_prologue_epilogue
12061 && frame.nregs == 1)
12062 restore_regs_via_mov = true;
12063 else
12064 restore_regs_via_mov = false;
12066 if (restore_regs_via_mov || frame.nsseregs)
12068 /* Ensure that the entire register save area is addressable via
12069 the stack pointer, if we will restore via sp. */
12070 if (TARGET_64BIT
12071 && m->fs.sp_offset > 0x7fffffff
12072 && !(m->fs.fp_valid || m->fs.drap_valid)
12073 && (frame.nsseregs + frame.nregs) != 0)
12075 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12076 GEN_INT (m->fs.sp_offset
12077 - frame.sse_reg_save_offset),
12078 style,
12079 m->fs.cfa_reg == stack_pointer_rtx);
12083 /* If there are any SSE registers to restore, then we have to do it
12084 via moves, since there's obviously no pop for SSE regs. */
12085 if (frame.nsseregs)
12086 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12087 style == 2);
12089 if (restore_regs_via_mov)
12091 rtx t;
12093 if (frame.nregs)
12094 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12096 /* eh_return epilogues need %ecx added to the stack pointer. */
12097 if (style == 2)
12099 rtx sa = EH_RETURN_STACKADJ_RTX;
12100 rtx_insn *insn;
12102 /* Stack align doesn't work with eh_return. */
12103 gcc_assert (!stack_realign_drap);
12104 /* Neither does regparm nested functions. */
12105 gcc_assert (!ix86_static_chain_on_stack);
12107 if (frame_pointer_needed)
12109 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12110 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12111 emit_insn (gen_rtx_SET (sa, t));
12113 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12114 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12116 /* Note that we use SA as a temporary CFA, as the return
12117 address is at the proper place relative to it. We
12118 pretend this happens at the FP restore insn because
12119 prior to this insn the FP would be stored at the wrong
12120 offset relative to SA, and after this insn we have no
12121 other reasonable register to use for the CFA. We don't
12122 bother resetting the CFA to the SP for the duration of
12123 the return insn. */
12124 add_reg_note (insn, REG_CFA_DEF_CFA,
12125 plus_constant (Pmode, sa, UNITS_PER_WORD));
12126 ix86_add_queued_cfa_restore_notes (insn);
12127 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12128 RTX_FRAME_RELATED_P (insn) = 1;
12130 m->fs.cfa_reg = sa;
12131 m->fs.cfa_offset = UNITS_PER_WORD;
12132 m->fs.fp_valid = false;
12134 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12135 const0_rtx, style, false);
12137 else
12139 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12140 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12141 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12142 ix86_add_queued_cfa_restore_notes (insn);
12144 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12145 if (m->fs.cfa_offset != UNITS_PER_WORD)
12147 m->fs.cfa_offset = UNITS_PER_WORD;
12148 add_reg_note (insn, REG_CFA_DEF_CFA,
12149 plus_constant (Pmode, stack_pointer_rtx,
12150 UNITS_PER_WORD));
12151 RTX_FRAME_RELATED_P (insn) = 1;
12154 m->fs.sp_offset = UNITS_PER_WORD;
12155 m->fs.sp_valid = true;
12158 else
12160 /* SEH requires that the function end with (1) a stack adjustment
12161 if necessary, (2) a sequence of pops, and (3) a return or
12162 jump instruction. Prevent insns from the function body from
12163 being scheduled into this sequence. */
12164 if (TARGET_SEH)
12166 /* Prevent a catch region from being adjacent to the standard
12167 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12168 several other flags that would be interesting to test are
12169 not yet set up. */
12170 if (flag_non_call_exceptions)
12171 emit_insn (gen_nops (const1_rtx));
12172 else
12173 emit_insn (gen_blockage ());
12176 /* First step is to deallocate the stack frame so that we can
12177 pop the registers. Also do it on SEH target for very large
12178 frame as the emitted instructions aren't allowed by the ABI in
12179 epilogues. */
12180 if (!m->fs.sp_valid
12181 || (TARGET_SEH
12182 && (m->fs.sp_offset - frame.reg_save_offset
12183 >= SEH_MAX_FRAME_SIZE)))
12185 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12186 GEN_INT (m->fs.fp_offset
12187 - frame.reg_save_offset),
12188 style, false);
12190 else if (m->fs.sp_offset != frame.reg_save_offset)
12192 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12193 GEN_INT (m->fs.sp_offset
12194 - frame.reg_save_offset),
12195 style,
12196 m->fs.cfa_reg == stack_pointer_rtx);
12199 ix86_emit_restore_regs_using_pop ();
12202 /* If we used a stack pointer and haven't already got rid of it,
12203 then do so now. */
12204 if (m->fs.fp_valid)
12206 /* If the stack pointer is valid and pointing at the frame
12207 pointer store address, then we only need a pop. */
12208 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12209 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12210 /* Leave results in shorter dependency chains on CPUs that are
12211 able to grok it fast. */
12212 else if (TARGET_USE_LEAVE
12213 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12214 || !cfun->machine->use_fast_prologue_epilogue)
12215 ix86_emit_leave ();
12216 else
12218 pro_epilogue_adjust_stack (stack_pointer_rtx,
12219 hard_frame_pointer_rtx,
12220 const0_rtx, style, !using_drap);
12221 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12225 if (using_drap)
12227 int param_ptr_offset = UNITS_PER_WORD;
12228 rtx_insn *insn;
12230 gcc_assert (stack_realign_drap);
12232 if (ix86_static_chain_on_stack)
12233 param_ptr_offset += UNITS_PER_WORD;
12234 if (!call_used_regs[REGNO (crtl->drap_reg)])
12235 param_ptr_offset += UNITS_PER_WORD;
12237 insn = emit_insn (gen_rtx_SET
12238 (stack_pointer_rtx,
12239 gen_rtx_PLUS (Pmode,
12240 crtl->drap_reg,
12241 GEN_INT (-param_ptr_offset))));
12242 m->fs.cfa_reg = stack_pointer_rtx;
12243 m->fs.cfa_offset = param_ptr_offset;
12244 m->fs.sp_offset = param_ptr_offset;
12245 m->fs.realigned = false;
12247 add_reg_note (insn, REG_CFA_DEF_CFA,
12248 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12249 GEN_INT (param_ptr_offset)));
12250 RTX_FRAME_RELATED_P (insn) = 1;
12252 if (!call_used_regs[REGNO (crtl->drap_reg)])
12253 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12256 /* At this point the stack pointer must be valid, and we must have
12257 restored all of the registers. We may not have deallocated the
12258 entire stack frame. We've delayed this until now because it may
12259 be possible to merge the local stack deallocation with the
12260 deallocation forced by ix86_static_chain_on_stack. */
12261 gcc_assert (m->fs.sp_valid);
12262 gcc_assert (!m->fs.fp_valid);
12263 gcc_assert (!m->fs.realigned);
12264 if (m->fs.sp_offset != UNITS_PER_WORD)
12266 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12267 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12268 style, true);
12270 else
12271 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12273 /* Sibcall epilogues don't want a return instruction. */
12274 if (style == 0)
12276 m->fs = frame_state_save;
12277 return;
12280 if (crtl->args.pops_args && crtl->args.size)
12282 rtx popc = GEN_INT (crtl->args.pops_args);
12284 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12285 address, do explicit add, and jump indirectly to the caller. */
12287 if (crtl->args.pops_args >= 65536)
12289 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12290 rtx_insn *insn;
12292 /* There is no "pascal" calling convention in any 64bit ABI. */
12293 gcc_assert (!TARGET_64BIT);
12295 insn = emit_insn (gen_pop (ecx));
12296 m->fs.cfa_offset -= UNITS_PER_WORD;
12297 m->fs.sp_offset -= UNITS_PER_WORD;
12299 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12300 x = gen_rtx_SET (stack_pointer_rtx, x);
12301 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12302 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12303 RTX_FRAME_RELATED_P (insn) = 1;
12305 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12306 popc, -1, true);
12307 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12309 else
12310 emit_jump_insn (gen_simple_return_pop_internal (popc));
12312 else
12313 emit_jump_insn (gen_simple_return_internal ());
12315 /* Restore the state back to the state from the prologue,
12316 so that it's correct for the next epilogue. */
12317 m->fs = frame_state_save;
12320 /* Reset from the function's potential modifications. */
12322 static void
12323 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12325 if (pic_offset_table_rtx
12326 && !ix86_use_pseudo_pic_reg ())
12327 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12328 #if TARGET_MACHO
12329 /* Mach-O doesn't support labels at the end of objects, so if
12330 it looks like we might want one, insert a NOP. */
12332 rtx_insn *insn = get_last_insn ();
12333 rtx_insn *deleted_debug_label = NULL;
12334 while (insn
12335 && NOTE_P (insn)
12336 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12338 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12339 notes only, instead set their CODE_LABEL_NUMBER to -1,
12340 otherwise there would be code generation differences
12341 in between -g and -g0. */
12342 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12343 deleted_debug_label = insn;
12344 insn = PREV_INSN (insn);
12346 if (insn
12347 && (LABEL_P (insn)
12348 || (NOTE_P (insn)
12349 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12350 fputs ("\tnop\n", file);
12351 else if (deleted_debug_label)
12352 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12353 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12354 CODE_LABEL_NUMBER (insn) = -1;
12356 #endif
12360 /* Return a scratch register to use in the split stack prologue. The
12361 split stack prologue is used for -fsplit-stack. It is the first
12362 instructions in the function, even before the regular prologue.
12363 The scratch register can be any caller-saved register which is not
12364 used for parameters or for the static chain. */
12366 static unsigned int
12367 split_stack_prologue_scratch_regno (void)
12369 if (TARGET_64BIT)
12370 return R11_REG;
12371 else
12373 bool is_fastcall, is_thiscall;
12374 int regparm;
12376 is_fastcall = (lookup_attribute ("fastcall",
12377 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12378 != NULL);
12379 is_thiscall = (lookup_attribute ("thiscall",
12380 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12381 != NULL);
12382 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12384 if (is_fastcall)
12386 if (DECL_STATIC_CHAIN (cfun->decl))
12388 sorry ("-fsplit-stack does not support fastcall with "
12389 "nested function");
12390 return INVALID_REGNUM;
12392 return AX_REG;
12394 else if (is_thiscall)
12396 if (!DECL_STATIC_CHAIN (cfun->decl))
12397 return DX_REG;
12398 return AX_REG;
12400 else if (regparm < 3)
12402 if (!DECL_STATIC_CHAIN (cfun->decl))
12403 return CX_REG;
12404 else
12406 if (regparm >= 2)
12408 sorry ("-fsplit-stack does not support 2 register "
12409 "parameters for a nested function");
12410 return INVALID_REGNUM;
12412 return DX_REG;
12415 else
12417 /* FIXME: We could make this work by pushing a register
12418 around the addition and comparison. */
12419 sorry ("-fsplit-stack does not support 3 register parameters");
12420 return INVALID_REGNUM;
12425 /* A SYMBOL_REF for the function which allocates new stackspace for
12426 -fsplit-stack. */
12428 static GTY(()) rtx split_stack_fn;
12430 /* A SYMBOL_REF for the more stack function when using the large
12431 model. */
12433 static GTY(()) rtx split_stack_fn_large;
12435 /* Handle -fsplit-stack. These are the first instructions in the
12436 function, even before the regular prologue. */
12438 void
12439 ix86_expand_split_stack_prologue (void)
12441 struct ix86_frame frame;
12442 HOST_WIDE_INT allocate;
12443 unsigned HOST_WIDE_INT args_size;
12444 rtx_code_label *label;
12445 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12446 rtx scratch_reg = NULL_RTX;
12447 rtx_code_label *varargs_label = NULL;
12448 rtx fn;
12450 gcc_assert (flag_split_stack && reload_completed);
12452 ix86_finalize_stack_realign_flags ();
12453 ix86_compute_frame_layout (&frame);
12454 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12456 /* This is the label we will branch to if we have enough stack
12457 space. We expect the basic block reordering pass to reverse this
12458 branch if optimizing, so that we branch in the unlikely case. */
12459 label = gen_label_rtx ();
12461 /* We need to compare the stack pointer minus the frame size with
12462 the stack boundary in the TCB. The stack boundary always gives
12463 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12464 can compare directly. Otherwise we need to do an addition. */
12466 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12467 UNSPEC_STACK_CHECK);
12468 limit = gen_rtx_CONST (Pmode, limit);
12469 limit = gen_rtx_MEM (Pmode, limit);
12470 if (allocate < SPLIT_STACK_AVAILABLE)
12471 current = stack_pointer_rtx;
12472 else
12474 unsigned int scratch_regno;
12475 rtx offset;
12477 /* We need a scratch register to hold the stack pointer minus
12478 the required frame size. Since this is the very start of the
12479 function, the scratch register can be any caller-saved
12480 register which is not used for parameters. */
12481 offset = GEN_INT (- allocate);
12482 scratch_regno = split_stack_prologue_scratch_regno ();
12483 if (scratch_regno == INVALID_REGNUM)
12484 return;
12485 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12486 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12488 /* We don't use ix86_gen_add3 in this case because it will
12489 want to split to lea, but when not optimizing the insn
12490 will not be split after this point. */
12491 emit_insn (gen_rtx_SET (scratch_reg,
12492 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12493 offset)));
12495 else
12497 emit_move_insn (scratch_reg, offset);
12498 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12499 stack_pointer_rtx));
12501 current = scratch_reg;
12504 ix86_expand_branch (GEU, current, limit, label);
12505 jump_insn = get_last_insn ();
12506 JUMP_LABEL (jump_insn) = label;
12508 /* Mark the jump as very likely to be taken. */
12509 add_int_reg_note (jump_insn, REG_BR_PROB,
12510 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12512 if (split_stack_fn == NULL_RTX)
12514 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12515 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12517 fn = split_stack_fn;
12519 /* Get more stack space. We pass in the desired stack space and the
12520 size of the arguments to copy to the new stack. In 32-bit mode
12521 we push the parameters; __morestack will return on a new stack
12522 anyhow. In 64-bit mode we pass the parameters in r10 and
12523 r11. */
12524 allocate_rtx = GEN_INT (allocate);
12525 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12526 call_fusage = NULL_RTX;
12527 if (TARGET_64BIT)
12529 rtx reg10, reg11;
12531 reg10 = gen_rtx_REG (Pmode, R10_REG);
12532 reg11 = gen_rtx_REG (Pmode, R11_REG);
12534 /* If this function uses a static chain, it will be in %r10.
12535 Preserve it across the call to __morestack. */
12536 if (DECL_STATIC_CHAIN (cfun->decl))
12538 rtx rax;
12540 rax = gen_rtx_REG (word_mode, AX_REG);
12541 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12542 use_reg (&call_fusage, rax);
12545 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12546 && !TARGET_PECOFF)
12548 HOST_WIDE_INT argval;
12550 gcc_assert (Pmode == DImode);
12551 /* When using the large model we need to load the address
12552 into a register, and we've run out of registers. So we
12553 switch to a different calling convention, and we call a
12554 different function: __morestack_large. We pass the
12555 argument size in the upper 32 bits of r10 and pass the
12556 frame size in the lower 32 bits. */
12557 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12558 gcc_assert ((args_size & 0xffffffff) == args_size);
12560 if (split_stack_fn_large == NULL_RTX)
12562 split_stack_fn_large =
12563 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12564 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12566 if (ix86_cmodel == CM_LARGE_PIC)
12568 rtx_code_label *label;
12569 rtx x;
12571 label = gen_label_rtx ();
12572 emit_label (label);
12573 LABEL_PRESERVE_P (label) = 1;
12574 emit_insn (gen_set_rip_rex64 (reg10, label));
12575 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12576 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12577 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12578 UNSPEC_GOT);
12579 x = gen_rtx_CONST (Pmode, x);
12580 emit_move_insn (reg11, x);
12581 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12582 x = gen_const_mem (Pmode, x);
12583 emit_move_insn (reg11, x);
12585 else
12586 emit_move_insn (reg11, split_stack_fn_large);
12588 fn = reg11;
12590 argval = ((args_size << 16) << 16) + allocate;
12591 emit_move_insn (reg10, GEN_INT (argval));
12593 else
12595 emit_move_insn (reg10, allocate_rtx);
12596 emit_move_insn (reg11, GEN_INT (args_size));
12597 use_reg (&call_fusage, reg11);
12600 use_reg (&call_fusage, reg10);
12602 else
12604 emit_insn (gen_push (GEN_INT (args_size)));
12605 emit_insn (gen_push (allocate_rtx));
12607 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12608 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12609 NULL_RTX, false);
12610 add_function_usage_to (call_insn, call_fusage);
12612 /* In order to make call/return prediction work right, we now need
12613 to execute a return instruction. See
12614 libgcc/config/i386/morestack.S for the details on how this works.
12616 For flow purposes gcc must not see this as a return
12617 instruction--we need control flow to continue at the subsequent
12618 label. Therefore, we use an unspec. */
12619 gcc_assert (crtl->args.pops_args < 65536);
12620 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12622 /* If we are in 64-bit mode and this function uses a static chain,
12623 we saved %r10 in %rax before calling _morestack. */
12624 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12625 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12626 gen_rtx_REG (word_mode, AX_REG));
12628 /* If this function calls va_start, we need to store a pointer to
12629 the arguments on the old stack, because they may not have been
12630 all copied to the new stack. At this point the old stack can be
12631 found at the frame pointer value used by __morestack, because
12632 __morestack has set that up before calling back to us. Here we
12633 store that pointer in a scratch register, and in
12634 ix86_expand_prologue we store the scratch register in a stack
12635 slot. */
12636 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12638 unsigned int scratch_regno;
12639 rtx frame_reg;
12640 int words;
12642 scratch_regno = split_stack_prologue_scratch_regno ();
12643 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12644 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12646 /* 64-bit:
12647 fp -> old fp value
12648 return address within this function
12649 return address of caller of this function
12650 stack arguments
12651 So we add three words to get to the stack arguments.
12653 32-bit:
12654 fp -> old fp value
12655 return address within this function
12656 first argument to __morestack
12657 second argument to __morestack
12658 return address of caller of this function
12659 stack arguments
12660 So we add five words to get to the stack arguments.
12662 words = TARGET_64BIT ? 3 : 5;
12663 emit_insn (gen_rtx_SET (scratch_reg,
12664 gen_rtx_PLUS (Pmode, frame_reg,
12665 GEN_INT (words * UNITS_PER_WORD))));
12667 varargs_label = gen_label_rtx ();
12668 emit_jump_insn (gen_jump (varargs_label));
12669 JUMP_LABEL (get_last_insn ()) = varargs_label;
12671 emit_barrier ();
12674 emit_label (label);
12675 LABEL_NUSES (label) = 1;
12677 /* If this function calls va_start, we now have to set the scratch
12678 register for the case where we do not call __morestack. In this
12679 case we need to set it based on the stack pointer. */
12680 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12682 emit_insn (gen_rtx_SET (scratch_reg,
12683 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12684 GEN_INT (UNITS_PER_WORD))));
12686 emit_label (varargs_label);
12687 LABEL_NUSES (varargs_label) = 1;
12691 /* We may have to tell the dataflow pass that the split stack prologue
12692 is initializing a scratch register. */
12694 static void
12695 ix86_live_on_entry (bitmap regs)
12697 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12699 gcc_assert (flag_split_stack);
12700 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12704 /* Extract the parts of an RTL expression that is a valid memory address
12705 for an instruction. Return 0 if the structure of the address is
12706 grossly off. Return -1 if the address contains ASHIFT, so it is not
12707 strictly valid, but still used for computing length of lea instruction. */
12710 ix86_decompose_address (rtx addr, struct ix86_address *out)
12712 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12713 rtx base_reg, index_reg;
12714 HOST_WIDE_INT scale = 1;
12715 rtx scale_rtx = NULL_RTX;
12716 rtx tmp;
12717 int retval = 1;
12718 enum ix86_address_seg seg = SEG_DEFAULT;
12720 /* Allow zero-extended SImode addresses,
12721 they will be emitted with addr32 prefix. */
12722 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12724 if (GET_CODE (addr) == ZERO_EXTEND
12725 && GET_MODE (XEXP (addr, 0)) == SImode)
12727 addr = XEXP (addr, 0);
12728 if (CONST_INT_P (addr))
12729 return 0;
12731 else if (GET_CODE (addr) == AND
12732 && const_32bit_mask (XEXP (addr, 1), DImode))
12734 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12735 if (addr == NULL_RTX)
12736 return 0;
12738 if (CONST_INT_P (addr))
12739 return 0;
12743 /* Allow SImode subregs of DImode addresses,
12744 they will be emitted with addr32 prefix. */
12745 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12747 if (GET_CODE (addr) == SUBREG
12748 && GET_MODE (SUBREG_REG (addr)) == DImode)
12750 addr = SUBREG_REG (addr);
12751 if (CONST_INT_P (addr))
12752 return 0;
12756 if (REG_P (addr))
12757 base = addr;
12758 else if (GET_CODE (addr) == SUBREG)
12760 if (REG_P (SUBREG_REG (addr)))
12761 base = addr;
12762 else
12763 return 0;
12765 else if (GET_CODE (addr) == PLUS)
12767 rtx addends[4], op;
12768 int n = 0, i;
12770 op = addr;
12773 if (n >= 4)
12774 return 0;
12775 addends[n++] = XEXP (op, 1);
12776 op = XEXP (op, 0);
12778 while (GET_CODE (op) == PLUS);
12779 if (n >= 4)
12780 return 0;
12781 addends[n] = op;
12783 for (i = n; i >= 0; --i)
12785 op = addends[i];
12786 switch (GET_CODE (op))
12788 case MULT:
12789 if (index)
12790 return 0;
12791 index = XEXP (op, 0);
12792 scale_rtx = XEXP (op, 1);
12793 break;
12795 case ASHIFT:
12796 if (index)
12797 return 0;
12798 index = XEXP (op, 0);
12799 tmp = XEXP (op, 1);
12800 if (!CONST_INT_P (tmp))
12801 return 0;
12802 scale = INTVAL (tmp);
12803 if ((unsigned HOST_WIDE_INT) scale > 3)
12804 return 0;
12805 scale = 1 << scale;
12806 break;
12808 case ZERO_EXTEND:
12809 op = XEXP (op, 0);
12810 if (GET_CODE (op) != UNSPEC)
12811 return 0;
12812 /* FALLTHRU */
12814 case UNSPEC:
12815 if (XINT (op, 1) == UNSPEC_TP
12816 && TARGET_TLS_DIRECT_SEG_REFS
12817 && seg == SEG_DEFAULT)
12818 seg = DEFAULT_TLS_SEG_REG;
12819 else
12820 return 0;
12821 break;
12823 case SUBREG:
12824 if (!REG_P (SUBREG_REG (op)))
12825 return 0;
12826 /* FALLTHRU */
12828 case REG:
12829 if (!base)
12830 base = op;
12831 else if (!index)
12832 index = op;
12833 else
12834 return 0;
12835 break;
12837 case CONST:
12838 case CONST_INT:
12839 case SYMBOL_REF:
12840 case LABEL_REF:
12841 if (disp)
12842 return 0;
12843 disp = op;
12844 break;
12846 default:
12847 return 0;
12851 else if (GET_CODE (addr) == MULT)
12853 index = XEXP (addr, 0); /* index*scale */
12854 scale_rtx = XEXP (addr, 1);
12856 else if (GET_CODE (addr) == ASHIFT)
12858 /* We're called for lea too, which implements ashift on occasion. */
12859 index = XEXP (addr, 0);
12860 tmp = XEXP (addr, 1);
12861 if (!CONST_INT_P (tmp))
12862 return 0;
12863 scale = INTVAL (tmp);
12864 if ((unsigned HOST_WIDE_INT) scale > 3)
12865 return 0;
12866 scale = 1 << scale;
12867 retval = -1;
12869 else
12870 disp = addr; /* displacement */
12872 if (index)
12874 if (REG_P (index))
12876 else if (GET_CODE (index) == SUBREG
12877 && REG_P (SUBREG_REG (index)))
12879 else
12880 return 0;
12883 /* Extract the integral value of scale. */
12884 if (scale_rtx)
12886 if (!CONST_INT_P (scale_rtx))
12887 return 0;
12888 scale = INTVAL (scale_rtx);
12891 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12892 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12894 /* Avoid useless 0 displacement. */
12895 if (disp == const0_rtx && (base || index))
12896 disp = NULL_RTX;
12898 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12899 if (base_reg && index_reg && scale == 1
12900 && (index_reg == arg_pointer_rtx
12901 || index_reg == frame_pointer_rtx
12902 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12904 std::swap (base, index);
12905 std::swap (base_reg, index_reg);
12908 /* Special case: %ebp cannot be encoded as a base without a displacement.
12909 Similarly %r13. */
12910 if (!disp
12911 && base_reg
12912 && (base_reg == hard_frame_pointer_rtx
12913 || base_reg == frame_pointer_rtx
12914 || base_reg == arg_pointer_rtx
12915 || (REG_P (base_reg)
12916 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12917 || REGNO (base_reg) == R13_REG))))
12918 disp = const0_rtx;
12920 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12921 Avoid this by transforming to [%esi+0].
12922 Reload calls address legitimization without cfun defined, so we need
12923 to test cfun for being non-NULL. */
12924 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12925 && base_reg && !index_reg && !disp
12926 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12927 disp = const0_rtx;
12929 /* Special case: encode reg+reg instead of reg*2. */
12930 if (!base && index && scale == 2)
12931 base = index, base_reg = index_reg, scale = 1;
12933 /* Special case: scaling cannot be encoded without base or displacement. */
12934 if (!base && !disp && index && scale != 1)
12935 disp = const0_rtx;
12937 out->base = base;
12938 out->index = index;
12939 out->disp = disp;
12940 out->scale = scale;
12941 out->seg = seg;
12943 return retval;
12946 /* Return cost of the memory address x.
12947 For i386, it is better to use a complex address than let gcc copy
12948 the address into a reg and make a new pseudo. But not if the address
12949 requires to two regs - that would mean more pseudos with longer
12950 lifetimes. */
12951 static int
12952 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12954 struct ix86_address parts;
12955 int cost = 1;
12956 int ok = ix86_decompose_address (x, &parts);
12958 gcc_assert (ok);
12960 if (parts.base && GET_CODE (parts.base) == SUBREG)
12961 parts.base = SUBREG_REG (parts.base);
12962 if (parts.index && GET_CODE (parts.index) == SUBREG)
12963 parts.index = SUBREG_REG (parts.index);
12965 /* Attempt to minimize number of registers in the address by increasing
12966 address cost for each used register. We don't increase address cost
12967 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12968 is not invariant itself it most likely means that base or index is not
12969 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12970 which is not profitable for x86. */
12971 if (parts.base
12972 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12973 && (current_pass->type == GIMPLE_PASS
12974 || !pic_offset_table_rtx
12975 || !REG_P (parts.base)
12976 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12977 cost++;
12979 if (parts.index
12980 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12981 && (current_pass->type == GIMPLE_PASS
12982 || !pic_offset_table_rtx
12983 || !REG_P (parts.index)
12984 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12985 cost++;
12987 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12988 since it's predecode logic can't detect the length of instructions
12989 and it degenerates to vector decoded. Increase cost of such
12990 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12991 to split such addresses or even refuse such addresses at all.
12993 Following addressing modes are affected:
12994 [base+scale*index]
12995 [scale*index+disp]
12996 [base+index]
12998 The first and last case may be avoidable by explicitly coding the zero in
12999 memory address, but I don't have AMD-K6 machine handy to check this
13000 theory. */
13002 if (TARGET_K6
13003 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13004 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13005 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13006 cost += 10;
13008 return cost;
13011 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13012 this is used for to form addresses to local data when -fPIC is in
13013 use. */
13015 static bool
13016 darwin_local_data_pic (rtx disp)
13018 return (GET_CODE (disp) == UNSPEC
13019 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13022 /* Determine if a given RTX is a valid constant. We already know this
13023 satisfies CONSTANT_P. */
13025 static bool
13026 ix86_legitimate_constant_p (machine_mode, rtx x)
13028 /* Pointer bounds constants are not valid. */
13029 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13030 return false;
13032 switch (GET_CODE (x))
13034 case CONST:
13035 x = XEXP (x, 0);
13037 if (GET_CODE (x) == PLUS)
13039 if (!CONST_INT_P (XEXP (x, 1)))
13040 return false;
13041 x = XEXP (x, 0);
13044 if (TARGET_MACHO && darwin_local_data_pic (x))
13045 return true;
13047 /* Only some unspecs are valid as "constants". */
13048 if (GET_CODE (x) == UNSPEC)
13049 switch (XINT (x, 1))
13051 case UNSPEC_GOT:
13052 case UNSPEC_GOTOFF:
13053 case UNSPEC_PLTOFF:
13054 return TARGET_64BIT;
13055 case UNSPEC_TPOFF:
13056 case UNSPEC_NTPOFF:
13057 x = XVECEXP (x, 0, 0);
13058 return (GET_CODE (x) == SYMBOL_REF
13059 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13060 case UNSPEC_DTPOFF:
13061 x = XVECEXP (x, 0, 0);
13062 return (GET_CODE (x) == SYMBOL_REF
13063 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13064 default:
13065 return false;
13068 /* We must have drilled down to a symbol. */
13069 if (GET_CODE (x) == LABEL_REF)
13070 return true;
13071 if (GET_CODE (x) != SYMBOL_REF)
13072 return false;
13073 /* FALLTHRU */
13075 case SYMBOL_REF:
13076 /* TLS symbols are never valid. */
13077 if (SYMBOL_REF_TLS_MODEL (x))
13078 return false;
13080 /* DLLIMPORT symbols are never valid. */
13081 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13082 && SYMBOL_REF_DLLIMPORT_P (x))
13083 return false;
13085 #if TARGET_MACHO
13086 /* mdynamic-no-pic */
13087 if (MACHO_DYNAMIC_NO_PIC_P)
13088 return machopic_symbol_defined_p (x);
13089 #endif
13090 break;
13092 case CONST_WIDE_INT:
13093 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13094 return false;
13095 break;
13097 case CONST_VECTOR:
13098 if (!standard_sse_constant_p (x))
13099 return false;
13101 default:
13102 break;
13105 /* Otherwise we handle everything else in the move patterns. */
13106 return true;
13109 /* Determine if it's legal to put X into the constant pool. This
13110 is not possible for the address of thread-local symbols, which
13111 is checked above. */
13113 static bool
13114 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13116 /* We can always put integral constants and vectors in memory. */
13117 switch (GET_CODE (x))
13119 case CONST_INT:
13120 case CONST_WIDE_INT:
13121 case CONST_DOUBLE:
13122 case CONST_VECTOR:
13123 return false;
13125 default:
13126 break;
13128 return !ix86_legitimate_constant_p (mode, x);
13131 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13132 otherwise zero. */
13134 static bool
13135 is_imported_p (rtx x)
13137 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13138 || GET_CODE (x) != SYMBOL_REF)
13139 return false;
13141 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13145 /* Nonzero if the constant value X is a legitimate general operand
13146 when generating PIC code. It is given that flag_pic is on and
13147 that X satisfies CONSTANT_P. */
13149 bool
13150 legitimate_pic_operand_p (rtx x)
13152 rtx inner;
13154 switch (GET_CODE (x))
13156 case CONST:
13157 inner = XEXP (x, 0);
13158 if (GET_CODE (inner) == PLUS
13159 && CONST_INT_P (XEXP (inner, 1)))
13160 inner = XEXP (inner, 0);
13162 /* Only some unspecs are valid as "constants". */
13163 if (GET_CODE (inner) == UNSPEC)
13164 switch (XINT (inner, 1))
13166 case UNSPEC_GOT:
13167 case UNSPEC_GOTOFF:
13168 case UNSPEC_PLTOFF:
13169 return TARGET_64BIT;
13170 case UNSPEC_TPOFF:
13171 x = XVECEXP (inner, 0, 0);
13172 return (GET_CODE (x) == SYMBOL_REF
13173 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13174 case UNSPEC_MACHOPIC_OFFSET:
13175 return legitimate_pic_address_disp_p (x);
13176 default:
13177 return false;
13179 /* FALLTHRU */
13181 case SYMBOL_REF:
13182 case LABEL_REF:
13183 return legitimate_pic_address_disp_p (x);
13185 default:
13186 return true;
13190 /* Determine if a given CONST RTX is a valid memory displacement
13191 in PIC mode. */
13193 bool
13194 legitimate_pic_address_disp_p (rtx disp)
13196 bool saw_plus;
13198 /* In 64bit mode we can allow direct addresses of symbols and labels
13199 when they are not dynamic symbols. */
13200 if (TARGET_64BIT)
13202 rtx op0 = disp, op1;
13204 switch (GET_CODE (disp))
13206 case LABEL_REF:
13207 return true;
13209 case CONST:
13210 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13211 break;
13212 op0 = XEXP (XEXP (disp, 0), 0);
13213 op1 = XEXP (XEXP (disp, 0), 1);
13214 if (!CONST_INT_P (op1)
13215 || INTVAL (op1) >= 16*1024*1024
13216 || INTVAL (op1) < -16*1024*1024)
13217 break;
13218 if (GET_CODE (op0) == LABEL_REF)
13219 return true;
13220 if (GET_CODE (op0) == CONST
13221 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13222 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13223 return true;
13224 if (GET_CODE (op0) == UNSPEC
13225 && XINT (op0, 1) == UNSPEC_PCREL)
13226 return true;
13227 if (GET_CODE (op0) != SYMBOL_REF)
13228 break;
13229 /* FALLTHRU */
13231 case SYMBOL_REF:
13232 /* TLS references should always be enclosed in UNSPEC.
13233 The dllimported symbol needs always to be resolved. */
13234 if (SYMBOL_REF_TLS_MODEL (op0)
13235 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13236 return false;
13238 if (TARGET_PECOFF)
13240 if (is_imported_p (op0))
13241 return true;
13243 if (SYMBOL_REF_FAR_ADDR_P (op0)
13244 || !SYMBOL_REF_LOCAL_P (op0))
13245 break;
13247 /* Function-symbols need to be resolved only for
13248 large-model.
13249 For the small-model we don't need to resolve anything
13250 here. */
13251 if ((ix86_cmodel != CM_LARGE_PIC
13252 && SYMBOL_REF_FUNCTION_P (op0))
13253 || ix86_cmodel == CM_SMALL_PIC)
13254 return true;
13255 /* Non-external symbols don't need to be resolved for
13256 large, and medium-model. */
13257 if ((ix86_cmodel == CM_LARGE_PIC
13258 || ix86_cmodel == CM_MEDIUM_PIC)
13259 && !SYMBOL_REF_EXTERNAL_P (op0))
13260 return true;
13262 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13263 && (SYMBOL_REF_LOCAL_P (op0)
13264 || (HAVE_LD_PIE_COPYRELOC
13265 && flag_pie
13266 && !SYMBOL_REF_WEAK (op0)
13267 && !SYMBOL_REF_FUNCTION_P (op0)))
13268 && ix86_cmodel != CM_LARGE_PIC)
13269 return true;
13270 break;
13272 default:
13273 break;
13276 if (GET_CODE (disp) != CONST)
13277 return false;
13278 disp = XEXP (disp, 0);
13280 if (TARGET_64BIT)
13282 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13283 of GOT tables. We should not need these anyway. */
13284 if (GET_CODE (disp) != UNSPEC
13285 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13286 && XINT (disp, 1) != UNSPEC_GOTOFF
13287 && XINT (disp, 1) != UNSPEC_PCREL
13288 && XINT (disp, 1) != UNSPEC_PLTOFF))
13289 return false;
13291 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13292 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13293 return false;
13294 return true;
13297 saw_plus = false;
13298 if (GET_CODE (disp) == PLUS)
13300 if (!CONST_INT_P (XEXP (disp, 1)))
13301 return false;
13302 disp = XEXP (disp, 0);
13303 saw_plus = true;
13306 if (TARGET_MACHO && darwin_local_data_pic (disp))
13307 return true;
13309 if (GET_CODE (disp) != UNSPEC)
13310 return false;
13312 switch (XINT (disp, 1))
13314 case UNSPEC_GOT:
13315 if (saw_plus)
13316 return false;
13317 /* We need to check for both symbols and labels because VxWorks loads
13318 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13319 details. */
13320 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13321 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13322 case UNSPEC_GOTOFF:
13323 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13324 While ABI specify also 32bit relocation but we don't produce it in
13325 small PIC model at all. */
13326 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13327 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13328 && !TARGET_64BIT)
13329 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13330 return false;
13331 case UNSPEC_GOTTPOFF:
13332 case UNSPEC_GOTNTPOFF:
13333 case UNSPEC_INDNTPOFF:
13334 if (saw_plus)
13335 return false;
13336 disp = XVECEXP (disp, 0, 0);
13337 return (GET_CODE (disp) == SYMBOL_REF
13338 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13339 case UNSPEC_NTPOFF:
13340 disp = XVECEXP (disp, 0, 0);
13341 return (GET_CODE (disp) == SYMBOL_REF
13342 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13343 case UNSPEC_DTPOFF:
13344 disp = XVECEXP (disp, 0, 0);
13345 return (GET_CODE (disp) == SYMBOL_REF
13346 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13349 return false;
13352 /* Determine if op is suitable RTX for an address register.
13353 Return naked register if a register or a register subreg is
13354 found, otherwise return NULL_RTX. */
13356 static rtx
13357 ix86_validate_address_register (rtx op)
13359 machine_mode mode = GET_MODE (op);
13361 /* Only SImode or DImode registers can form the address. */
13362 if (mode != SImode && mode != DImode)
13363 return NULL_RTX;
13365 if (REG_P (op))
13366 return op;
13367 else if (GET_CODE (op) == SUBREG)
13369 rtx reg = SUBREG_REG (op);
13371 if (!REG_P (reg))
13372 return NULL_RTX;
13374 mode = GET_MODE (reg);
13376 /* Don't allow SUBREGs that span more than a word. It can
13377 lead to spill failures when the register is one word out
13378 of a two word structure. */
13379 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13380 return NULL_RTX;
13382 /* Allow only SUBREGs of non-eliminable hard registers. */
13383 if (register_no_elim_operand (reg, mode))
13384 return reg;
13387 /* Op is not a register. */
13388 return NULL_RTX;
13391 /* Recognizes RTL expressions that are valid memory addresses for an
13392 instruction. The MODE argument is the machine mode for the MEM
13393 expression that wants to use this address.
13395 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13396 convert common non-canonical forms to canonical form so that they will
13397 be recognized. */
13399 static bool
13400 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13402 struct ix86_address parts;
13403 rtx base, index, disp;
13404 HOST_WIDE_INT scale;
13405 enum ix86_address_seg seg;
13407 if (ix86_decompose_address (addr, &parts) <= 0)
13408 /* Decomposition failed. */
13409 return false;
13411 base = parts.base;
13412 index = parts.index;
13413 disp = parts.disp;
13414 scale = parts.scale;
13415 seg = parts.seg;
13417 /* Validate base register. */
13418 if (base)
13420 rtx reg = ix86_validate_address_register (base);
13422 if (reg == NULL_RTX)
13423 return false;
13425 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13426 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13427 /* Base is not valid. */
13428 return false;
13431 /* Validate index register. */
13432 if (index)
13434 rtx reg = ix86_validate_address_register (index);
13436 if (reg == NULL_RTX)
13437 return false;
13439 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13440 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13441 /* Index is not valid. */
13442 return false;
13445 /* Index and base should have the same mode. */
13446 if (base && index
13447 && GET_MODE (base) != GET_MODE (index))
13448 return false;
13450 /* Address override works only on the (%reg) part of %fs:(%reg). */
13451 if (seg != SEG_DEFAULT
13452 && ((base && GET_MODE (base) != word_mode)
13453 || (index && GET_MODE (index) != word_mode)))
13454 return false;
13456 /* Validate scale factor. */
13457 if (scale != 1)
13459 if (!index)
13460 /* Scale without index. */
13461 return false;
13463 if (scale != 2 && scale != 4 && scale != 8)
13464 /* Scale is not a valid multiplier. */
13465 return false;
13468 /* Validate displacement. */
13469 if (disp)
13471 if (GET_CODE (disp) == CONST
13472 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13473 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13474 switch (XINT (XEXP (disp, 0), 1))
13476 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13477 used. While ABI specify also 32bit relocations, we don't produce
13478 them at all and use IP relative instead. */
13479 case UNSPEC_GOT:
13480 case UNSPEC_GOTOFF:
13481 gcc_assert (flag_pic);
13482 if (!TARGET_64BIT)
13483 goto is_legitimate_pic;
13485 /* 64bit address unspec. */
13486 return false;
13488 case UNSPEC_GOTPCREL:
13489 case UNSPEC_PCREL:
13490 gcc_assert (flag_pic);
13491 goto is_legitimate_pic;
13493 case UNSPEC_GOTTPOFF:
13494 case UNSPEC_GOTNTPOFF:
13495 case UNSPEC_INDNTPOFF:
13496 case UNSPEC_NTPOFF:
13497 case UNSPEC_DTPOFF:
13498 break;
13500 case UNSPEC_STACK_CHECK:
13501 gcc_assert (flag_split_stack);
13502 break;
13504 default:
13505 /* Invalid address unspec. */
13506 return false;
13509 else if (SYMBOLIC_CONST (disp)
13510 && (flag_pic
13511 || (TARGET_MACHO
13512 #if TARGET_MACHO
13513 && MACHOPIC_INDIRECT
13514 && !machopic_operand_p (disp)
13515 #endif
13519 is_legitimate_pic:
13520 if (TARGET_64BIT && (index || base))
13522 /* foo@dtpoff(%rX) is ok. */
13523 if (GET_CODE (disp) != CONST
13524 || GET_CODE (XEXP (disp, 0)) != PLUS
13525 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13526 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13527 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13528 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13529 /* Non-constant pic memory reference. */
13530 return false;
13532 else if ((!TARGET_MACHO || flag_pic)
13533 && ! legitimate_pic_address_disp_p (disp))
13534 /* Displacement is an invalid pic construct. */
13535 return false;
13536 #if TARGET_MACHO
13537 else if (MACHO_DYNAMIC_NO_PIC_P
13538 && !ix86_legitimate_constant_p (Pmode, disp))
13539 /* displacment must be referenced via non_lazy_pointer */
13540 return false;
13541 #endif
13543 /* This code used to verify that a symbolic pic displacement
13544 includes the pic_offset_table_rtx register.
13546 While this is good idea, unfortunately these constructs may
13547 be created by "adds using lea" optimization for incorrect
13548 code like:
13550 int a;
13551 int foo(int i)
13553 return *(&a+i);
13556 This code is nonsensical, but results in addressing
13557 GOT table with pic_offset_table_rtx base. We can't
13558 just refuse it easily, since it gets matched by
13559 "addsi3" pattern, that later gets split to lea in the
13560 case output register differs from input. While this
13561 can be handled by separate addsi pattern for this case
13562 that never results in lea, this seems to be easier and
13563 correct fix for crash to disable this test. */
13565 else if (GET_CODE (disp) != LABEL_REF
13566 && !CONST_INT_P (disp)
13567 && (GET_CODE (disp) != CONST
13568 || !ix86_legitimate_constant_p (Pmode, disp))
13569 && (GET_CODE (disp) != SYMBOL_REF
13570 || !ix86_legitimate_constant_p (Pmode, disp)))
13571 /* Displacement is not constant. */
13572 return false;
13573 else if (TARGET_64BIT
13574 && !x86_64_immediate_operand (disp, VOIDmode))
13575 /* Displacement is out of range. */
13576 return false;
13577 /* In x32 mode, constant addresses are sign extended to 64bit, so
13578 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13579 else if (TARGET_X32 && !(index || base)
13580 && CONST_INT_P (disp)
13581 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13582 return false;
13585 /* Everything looks valid. */
13586 return true;
13589 /* Determine if a given RTX is a valid constant address. */
13591 bool
13592 constant_address_p (rtx x)
13594 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13597 /* Return a unique alias set for the GOT. */
13599 static alias_set_type
13600 ix86_GOT_alias_set (void)
13602 static alias_set_type set = -1;
13603 if (set == -1)
13604 set = new_alias_set ();
13605 return set;
13608 /* Return a legitimate reference for ORIG (an address) using the
13609 register REG. If REG is 0, a new pseudo is generated.
13611 There are two types of references that must be handled:
13613 1. Global data references must load the address from the GOT, via
13614 the PIC reg. An insn is emitted to do this load, and the reg is
13615 returned.
13617 2. Static data references, constant pool addresses, and code labels
13618 compute the address as an offset from the GOT, whose base is in
13619 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13620 differentiate them from global data objects. The returned
13621 address is the PIC reg + an unspec constant.
13623 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13624 reg also appears in the address. */
13626 static rtx
13627 legitimize_pic_address (rtx orig, rtx reg)
13629 rtx addr = orig;
13630 rtx new_rtx = orig;
13632 #if TARGET_MACHO
13633 if (TARGET_MACHO && !TARGET_64BIT)
13635 if (reg == 0)
13636 reg = gen_reg_rtx (Pmode);
13637 /* Use the generic Mach-O PIC machinery. */
13638 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13640 #endif
13642 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13644 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13645 if (tmp)
13646 return tmp;
13649 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13650 new_rtx = addr;
13651 else if (TARGET_64BIT && !TARGET_PECOFF
13652 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13654 rtx tmpreg;
13655 /* This symbol may be referenced via a displacement from the PIC
13656 base address (@GOTOFF). */
13658 if (GET_CODE (addr) == CONST)
13659 addr = XEXP (addr, 0);
13660 if (GET_CODE (addr) == PLUS)
13662 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13663 UNSPEC_GOTOFF);
13664 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13666 else
13667 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13668 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13669 if (!reg)
13670 tmpreg = gen_reg_rtx (Pmode);
13671 else
13672 tmpreg = reg;
13673 emit_move_insn (tmpreg, new_rtx);
13675 if (reg != 0)
13677 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13678 tmpreg, 1, OPTAB_DIRECT);
13679 new_rtx = reg;
13681 else
13682 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13684 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13686 /* This symbol may be referenced via a displacement from the PIC
13687 base address (@GOTOFF). */
13689 if (GET_CODE (addr) == CONST)
13690 addr = XEXP (addr, 0);
13691 if (GET_CODE (addr) == PLUS)
13693 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13694 UNSPEC_GOTOFF);
13695 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13697 else
13698 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13699 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13700 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13702 if (reg != 0)
13704 emit_move_insn (reg, new_rtx);
13705 new_rtx = reg;
13708 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13709 /* We can't use @GOTOFF for text labels on VxWorks;
13710 see gotoff_operand. */
13711 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13713 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13714 if (tmp)
13715 return tmp;
13717 /* For x64 PE-COFF there is no GOT table. So we use address
13718 directly. */
13719 if (TARGET_64BIT && TARGET_PECOFF)
13721 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13722 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13724 if (reg == 0)
13725 reg = gen_reg_rtx (Pmode);
13726 emit_move_insn (reg, new_rtx);
13727 new_rtx = reg;
13729 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13731 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13732 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13733 new_rtx = gen_const_mem (Pmode, new_rtx);
13734 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13736 if (reg == 0)
13737 reg = gen_reg_rtx (Pmode);
13738 /* Use directly gen_movsi, otherwise the address is loaded
13739 into register for CSE. We don't want to CSE this addresses,
13740 instead we CSE addresses from the GOT table, so skip this. */
13741 emit_insn (gen_movsi (reg, new_rtx));
13742 new_rtx = reg;
13744 else
13746 /* This symbol must be referenced via a load from the
13747 Global Offset Table (@GOT). */
13749 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13750 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13751 if (TARGET_64BIT)
13752 new_rtx = force_reg (Pmode, new_rtx);
13753 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13754 new_rtx = gen_const_mem (Pmode, new_rtx);
13755 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13757 if (reg == 0)
13758 reg = gen_reg_rtx (Pmode);
13759 emit_move_insn (reg, new_rtx);
13760 new_rtx = reg;
13763 else
13765 if (CONST_INT_P (addr)
13766 && !x86_64_immediate_operand (addr, VOIDmode))
13768 if (reg)
13770 emit_move_insn (reg, addr);
13771 new_rtx = reg;
13773 else
13774 new_rtx = force_reg (Pmode, addr);
13776 else if (GET_CODE (addr) == CONST)
13778 addr = XEXP (addr, 0);
13780 /* We must match stuff we generate before. Assume the only
13781 unspecs that can get here are ours. Not that we could do
13782 anything with them anyway.... */
13783 if (GET_CODE (addr) == UNSPEC
13784 || (GET_CODE (addr) == PLUS
13785 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13786 return orig;
13787 gcc_assert (GET_CODE (addr) == PLUS);
13789 if (GET_CODE (addr) == PLUS)
13791 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13793 /* Check first to see if this is a constant offset from a @GOTOFF
13794 symbol reference. */
13795 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13796 && CONST_INT_P (op1))
13798 if (!TARGET_64BIT)
13800 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13801 UNSPEC_GOTOFF);
13802 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13803 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13804 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13806 if (reg != 0)
13808 emit_move_insn (reg, new_rtx);
13809 new_rtx = reg;
13812 else
13814 if (INTVAL (op1) < -16*1024*1024
13815 || INTVAL (op1) >= 16*1024*1024)
13817 if (!x86_64_immediate_operand (op1, Pmode))
13818 op1 = force_reg (Pmode, op1);
13819 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13823 else
13825 rtx base = legitimize_pic_address (op0, reg);
13826 machine_mode mode = GET_MODE (base);
13827 new_rtx
13828 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13830 if (CONST_INT_P (new_rtx))
13832 if (INTVAL (new_rtx) < -16*1024*1024
13833 || INTVAL (new_rtx) >= 16*1024*1024)
13835 if (!x86_64_immediate_operand (new_rtx, mode))
13836 new_rtx = force_reg (mode, new_rtx);
13837 new_rtx
13838 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13840 else
13841 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13843 else
13845 /* For %rip addressing, we have to use just disp32, not
13846 base nor index. */
13847 if (TARGET_64BIT
13848 && (GET_CODE (base) == SYMBOL_REF
13849 || GET_CODE (base) == LABEL_REF))
13850 base = force_reg (mode, base);
13851 if (GET_CODE (new_rtx) == PLUS
13852 && CONSTANT_P (XEXP (new_rtx, 1)))
13854 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13855 new_rtx = XEXP (new_rtx, 1);
13857 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13862 return new_rtx;
13865 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13867 static rtx
13868 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13870 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13872 if (GET_MODE (tp) != tp_mode)
13874 gcc_assert (GET_MODE (tp) == SImode);
13875 gcc_assert (tp_mode == DImode);
13877 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13880 if (to_reg)
13881 tp = copy_to_mode_reg (tp_mode, tp);
13883 return tp;
13886 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13888 static GTY(()) rtx ix86_tls_symbol;
13890 static rtx
13891 ix86_tls_get_addr (void)
13893 if (!ix86_tls_symbol)
13895 const char *sym
13896 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13897 ? "___tls_get_addr" : "__tls_get_addr");
13899 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13902 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13904 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13905 UNSPEC_PLTOFF);
13906 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13907 gen_rtx_CONST (Pmode, unspec));
13910 return ix86_tls_symbol;
13913 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13915 static GTY(()) rtx ix86_tls_module_base_symbol;
13918 ix86_tls_module_base (void)
13920 if (!ix86_tls_module_base_symbol)
13922 ix86_tls_module_base_symbol
13923 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13925 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13926 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13929 return ix86_tls_module_base_symbol;
13932 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13933 false if we expect this to be used for a memory address and true if
13934 we expect to load the address into a register. */
13936 static rtx
13937 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13939 rtx dest, base, off;
13940 rtx pic = NULL_RTX, tp = NULL_RTX;
13941 machine_mode tp_mode = Pmode;
13942 int type;
13944 /* Fall back to global dynamic model if tool chain cannot support local
13945 dynamic. */
13946 if (TARGET_SUN_TLS && !TARGET_64BIT
13947 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13948 && model == TLS_MODEL_LOCAL_DYNAMIC)
13949 model = TLS_MODEL_GLOBAL_DYNAMIC;
13951 switch (model)
13953 case TLS_MODEL_GLOBAL_DYNAMIC:
13954 dest = gen_reg_rtx (Pmode);
13956 if (!TARGET_64BIT)
13958 if (flag_pic && !TARGET_PECOFF)
13959 pic = pic_offset_table_rtx;
13960 else
13962 pic = gen_reg_rtx (Pmode);
13963 emit_insn (gen_set_got (pic));
13967 if (TARGET_GNU2_TLS)
13969 if (TARGET_64BIT)
13970 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13971 else
13972 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13974 tp = get_thread_pointer (Pmode, true);
13975 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13977 if (GET_MODE (x) != Pmode)
13978 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13980 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13982 else
13984 rtx caddr = ix86_tls_get_addr ();
13986 if (TARGET_64BIT)
13988 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13989 rtx_insn *insns;
13991 start_sequence ();
13992 emit_call_insn
13993 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13994 insns = get_insns ();
13995 end_sequence ();
13997 if (GET_MODE (x) != Pmode)
13998 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14000 RTL_CONST_CALL_P (insns) = 1;
14001 emit_libcall_block (insns, dest, rax, x);
14003 else
14004 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14006 break;
14008 case TLS_MODEL_LOCAL_DYNAMIC:
14009 base = gen_reg_rtx (Pmode);
14011 if (!TARGET_64BIT)
14013 if (flag_pic)
14014 pic = pic_offset_table_rtx;
14015 else
14017 pic = gen_reg_rtx (Pmode);
14018 emit_insn (gen_set_got (pic));
14022 if (TARGET_GNU2_TLS)
14024 rtx tmp = ix86_tls_module_base ();
14026 if (TARGET_64BIT)
14027 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14028 else
14029 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14031 tp = get_thread_pointer (Pmode, true);
14032 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14033 gen_rtx_MINUS (Pmode, tmp, tp));
14035 else
14037 rtx caddr = ix86_tls_get_addr ();
14039 if (TARGET_64BIT)
14041 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14042 rtx_insn *insns;
14043 rtx eqv;
14045 start_sequence ();
14046 emit_call_insn
14047 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14048 insns = get_insns ();
14049 end_sequence ();
14051 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14052 share the LD_BASE result with other LD model accesses. */
14053 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14054 UNSPEC_TLS_LD_BASE);
14056 RTL_CONST_CALL_P (insns) = 1;
14057 emit_libcall_block (insns, base, rax, eqv);
14059 else
14060 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14063 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14064 off = gen_rtx_CONST (Pmode, off);
14066 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14068 if (TARGET_GNU2_TLS)
14070 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14072 if (GET_MODE (x) != Pmode)
14073 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14075 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14077 break;
14079 case TLS_MODEL_INITIAL_EXEC:
14080 if (TARGET_64BIT)
14082 if (TARGET_SUN_TLS && !TARGET_X32)
14084 /* The Sun linker took the AMD64 TLS spec literally
14085 and can only handle %rax as destination of the
14086 initial executable code sequence. */
14088 dest = gen_reg_rtx (DImode);
14089 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14090 return dest;
14093 /* Generate DImode references to avoid %fs:(%reg32)
14094 problems and linker IE->LE relaxation bug. */
14095 tp_mode = DImode;
14096 pic = NULL;
14097 type = UNSPEC_GOTNTPOFF;
14099 else if (flag_pic)
14101 pic = pic_offset_table_rtx;
14102 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14104 else if (!TARGET_ANY_GNU_TLS)
14106 pic = gen_reg_rtx (Pmode);
14107 emit_insn (gen_set_got (pic));
14108 type = UNSPEC_GOTTPOFF;
14110 else
14112 pic = NULL;
14113 type = UNSPEC_INDNTPOFF;
14116 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14117 off = gen_rtx_CONST (tp_mode, off);
14118 if (pic)
14119 off = gen_rtx_PLUS (tp_mode, pic, off);
14120 off = gen_const_mem (tp_mode, off);
14121 set_mem_alias_set (off, ix86_GOT_alias_set ());
14123 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14125 base = get_thread_pointer (tp_mode,
14126 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14127 off = force_reg (tp_mode, off);
14128 return gen_rtx_PLUS (tp_mode, base, off);
14130 else
14132 base = get_thread_pointer (Pmode, true);
14133 dest = gen_reg_rtx (Pmode);
14134 emit_insn (ix86_gen_sub3 (dest, base, off));
14136 break;
14138 case TLS_MODEL_LOCAL_EXEC:
14139 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14140 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14141 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14142 off = gen_rtx_CONST (Pmode, off);
14144 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14146 base = get_thread_pointer (Pmode,
14147 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14148 return gen_rtx_PLUS (Pmode, base, off);
14150 else
14152 base = get_thread_pointer (Pmode, true);
14153 dest = gen_reg_rtx (Pmode);
14154 emit_insn (ix86_gen_sub3 (dest, base, off));
14156 break;
14158 default:
14159 gcc_unreachable ();
14162 return dest;
14165 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14166 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14167 unique refptr-DECL symbol corresponding to symbol DECL. */
14169 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14171 static inline hashval_t hash (tree_map *m) { return m->hash; }
14172 static inline bool
14173 equal (tree_map *a, tree_map *b)
14175 return a->base.from == b->base.from;
14178 static void
14179 handle_cache_entry (tree_map *&m)
14181 extern void gt_ggc_mx (tree_map *&);
14182 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14183 return;
14184 else if (ggc_marked_p (m->base.from))
14185 gt_ggc_mx (m);
14186 else
14187 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14191 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14193 static tree
14194 get_dllimport_decl (tree decl, bool beimport)
14196 struct tree_map *h, in;
14197 const char *name;
14198 const char *prefix;
14199 size_t namelen, prefixlen;
14200 char *imp_name;
14201 tree to;
14202 rtx rtl;
14204 if (!dllimport_map)
14205 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14207 in.hash = htab_hash_pointer (decl);
14208 in.base.from = decl;
14209 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14210 h = *loc;
14211 if (h)
14212 return h->to;
14214 *loc = h = ggc_alloc<tree_map> ();
14215 h->hash = in.hash;
14216 h->base.from = decl;
14217 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14218 VAR_DECL, NULL, ptr_type_node);
14219 DECL_ARTIFICIAL (to) = 1;
14220 DECL_IGNORED_P (to) = 1;
14221 DECL_EXTERNAL (to) = 1;
14222 TREE_READONLY (to) = 1;
14224 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14225 name = targetm.strip_name_encoding (name);
14226 if (beimport)
14227 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14228 ? "*__imp_" : "*__imp__";
14229 else
14230 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14231 namelen = strlen (name);
14232 prefixlen = strlen (prefix);
14233 imp_name = (char *) alloca (namelen + prefixlen + 1);
14234 memcpy (imp_name, prefix, prefixlen);
14235 memcpy (imp_name + prefixlen, name, namelen + 1);
14237 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14238 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14239 SET_SYMBOL_REF_DECL (rtl, to);
14240 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14241 if (!beimport)
14243 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14244 #ifdef SUB_TARGET_RECORD_STUB
14245 SUB_TARGET_RECORD_STUB (name);
14246 #endif
14249 rtl = gen_const_mem (Pmode, rtl);
14250 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14252 SET_DECL_RTL (to, rtl);
14253 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14255 return to;
14258 /* Expand SYMBOL into its corresponding far-addresse symbol.
14259 WANT_REG is true if we require the result be a register. */
14261 static rtx
14262 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14264 tree imp_decl;
14265 rtx x;
14267 gcc_assert (SYMBOL_REF_DECL (symbol));
14268 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14270 x = DECL_RTL (imp_decl);
14271 if (want_reg)
14272 x = force_reg (Pmode, x);
14273 return x;
14276 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14277 true if we require the result be a register. */
14279 static rtx
14280 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14282 tree imp_decl;
14283 rtx x;
14285 gcc_assert (SYMBOL_REF_DECL (symbol));
14286 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14288 x = DECL_RTL (imp_decl);
14289 if (want_reg)
14290 x = force_reg (Pmode, x);
14291 return x;
14294 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14295 is true if we require the result be a register. */
14297 static rtx
14298 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14300 if (!TARGET_PECOFF)
14301 return NULL_RTX;
14303 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14305 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14306 return legitimize_dllimport_symbol (addr, inreg);
14307 if (GET_CODE (addr) == CONST
14308 && GET_CODE (XEXP (addr, 0)) == PLUS
14309 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14310 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14312 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14313 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14317 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14318 return NULL_RTX;
14319 if (GET_CODE (addr) == SYMBOL_REF
14320 && !is_imported_p (addr)
14321 && SYMBOL_REF_EXTERNAL_P (addr)
14322 && SYMBOL_REF_DECL (addr))
14323 return legitimize_pe_coff_extern_decl (addr, inreg);
14325 if (GET_CODE (addr) == CONST
14326 && GET_CODE (XEXP (addr, 0)) == PLUS
14327 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14328 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14329 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14330 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14332 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14333 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14335 return NULL_RTX;
14338 /* Try machine-dependent ways of modifying an illegitimate address
14339 to be legitimate. If we find one, return the new, valid address.
14340 This macro is used in only one place: `memory_address' in explow.c.
14342 OLDX is the address as it was before break_out_memory_refs was called.
14343 In some cases it is useful to look at this to decide what needs to be done.
14345 It is always safe for this macro to do nothing. It exists to recognize
14346 opportunities to optimize the output.
14348 For the 80386, we handle X+REG by loading X into a register R and
14349 using R+REG. R will go in a general reg and indexing will be used.
14350 However, if REG is a broken-out memory address or multiplication,
14351 nothing needs to be done because REG can certainly go in a general reg.
14353 When -fpic is used, special handling is needed for symbolic references.
14354 See comments by legitimize_pic_address in i386.c for details. */
14356 static rtx
14357 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14359 bool changed = false;
14360 unsigned log;
14362 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14363 if (log)
14364 return legitimize_tls_address (x, (enum tls_model) log, false);
14365 if (GET_CODE (x) == CONST
14366 && GET_CODE (XEXP (x, 0)) == PLUS
14367 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14368 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14370 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14371 (enum tls_model) log, false);
14372 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14375 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14377 rtx tmp = legitimize_pe_coff_symbol (x, true);
14378 if (tmp)
14379 return tmp;
14382 if (flag_pic && SYMBOLIC_CONST (x))
14383 return legitimize_pic_address (x, 0);
14385 #if TARGET_MACHO
14386 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14387 return machopic_indirect_data_reference (x, 0);
14388 #endif
14390 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14391 if (GET_CODE (x) == ASHIFT
14392 && CONST_INT_P (XEXP (x, 1))
14393 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14395 changed = true;
14396 log = INTVAL (XEXP (x, 1));
14397 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14398 GEN_INT (1 << log));
14401 if (GET_CODE (x) == PLUS)
14403 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14405 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14406 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14407 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14409 changed = true;
14410 log = INTVAL (XEXP (XEXP (x, 0), 1));
14411 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14412 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14413 GEN_INT (1 << log));
14416 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14417 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14418 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14420 changed = true;
14421 log = INTVAL (XEXP (XEXP (x, 1), 1));
14422 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14423 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14424 GEN_INT (1 << log));
14427 /* Put multiply first if it isn't already. */
14428 if (GET_CODE (XEXP (x, 1)) == MULT)
14430 std::swap (XEXP (x, 0), XEXP (x, 1));
14431 changed = true;
14434 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14435 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14436 created by virtual register instantiation, register elimination, and
14437 similar optimizations. */
14438 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14440 changed = true;
14441 x = gen_rtx_PLUS (Pmode,
14442 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14443 XEXP (XEXP (x, 1), 0)),
14444 XEXP (XEXP (x, 1), 1));
14447 /* Canonicalize
14448 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14449 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14450 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14451 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14452 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14453 && CONSTANT_P (XEXP (x, 1)))
14455 rtx constant;
14456 rtx other = NULL_RTX;
14458 if (CONST_INT_P (XEXP (x, 1)))
14460 constant = XEXP (x, 1);
14461 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14463 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14465 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14466 other = XEXP (x, 1);
14468 else
14469 constant = 0;
14471 if (constant)
14473 changed = true;
14474 x = gen_rtx_PLUS (Pmode,
14475 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14476 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14477 plus_constant (Pmode, other,
14478 INTVAL (constant)));
14482 if (changed && ix86_legitimate_address_p (mode, x, false))
14483 return x;
14485 if (GET_CODE (XEXP (x, 0)) == MULT)
14487 changed = true;
14488 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14491 if (GET_CODE (XEXP (x, 1)) == MULT)
14493 changed = true;
14494 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14497 if (changed
14498 && REG_P (XEXP (x, 1))
14499 && REG_P (XEXP (x, 0)))
14500 return x;
14502 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14504 changed = true;
14505 x = legitimize_pic_address (x, 0);
14508 if (changed && ix86_legitimate_address_p (mode, x, false))
14509 return x;
14511 if (REG_P (XEXP (x, 0)))
14513 rtx temp = gen_reg_rtx (Pmode);
14514 rtx val = force_operand (XEXP (x, 1), temp);
14515 if (val != temp)
14517 val = convert_to_mode (Pmode, val, 1);
14518 emit_move_insn (temp, val);
14521 XEXP (x, 1) = temp;
14522 return x;
14525 else if (REG_P (XEXP (x, 1)))
14527 rtx temp = gen_reg_rtx (Pmode);
14528 rtx val = force_operand (XEXP (x, 0), temp);
14529 if (val != temp)
14531 val = convert_to_mode (Pmode, val, 1);
14532 emit_move_insn (temp, val);
14535 XEXP (x, 0) = temp;
14536 return x;
14540 return x;
14543 /* Print an integer constant expression in assembler syntax. Addition
14544 and subtraction are the only arithmetic that may appear in these
14545 expressions. FILE is the stdio stream to write to, X is the rtx, and
14546 CODE is the operand print code from the output string. */
14548 static void
14549 output_pic_addr_const (FILE *file, rtx x, int code)
14551 char buf[256];
14553 switch (GET_CODE (x))
14555 case PC:
14556 gcc_assert (flag_pic);
14557 putc ('.', file);
14558 break;
14560 case SYMBOL_REF:
14561 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14562 output_addr_const (file, x);
14563 else
14565 const char *name = XSTR (x, 0);
14567 /* Mark the decl as referenced so that cgraph will
14568 output the function. */
14569 if (SYMBOL_REF_DECL (x))
14570 mark_decl_referenced (SYMBOL_REF_DECL (x));
14572 #if TARGET_MACHO
14573 if (MACHOPIC_INDIRECT
14574 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14575 name = machopic_indirection_name (x, /*stub_p=*/true);
14576 #endif
14577 assemble_name (file, name);
14579 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14580 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14581 fputs ("@PLT", file);
14582 break;
14584 case LABEL_REF:
14585 x = XEXP (x, 0);
14586 /* FALLTHRU */
14587 case CODE_LABEL:
14588 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14589 assemble_name (asm_out_file, buf);
14590 break;
14592 case CONST_INT:
14593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14594 break;
14596 case CONST:
14597 /* This used to output parentheses around the expression,
14598 but that does not work on the 386 (either ATT or BSD assembler). */
14599 output_pic_addr_const (file, XEXP (x, 0), code);
14600 break;
14602 case CONST_DOUBLE:
14603 /* We can't handle floating point constants;
14604 TARGET_PRINT_OPERAND must handle them. */
14605 output_operand_lossage ("floating constant misused");
14606 break;
14608 case PLUS:
14609 /* Some assemblers need integer constants to appear first. */
14610 if (CONST_INT_P (XEXP (x, 0)))
14612 output_pic_addr_const (file, XEXP (x, 0), code);
14613 putc ('+', file);
14614 output_pic_addr_const (file, XEXP (x, 1), code);
14616 else
14618 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14619 output_pic_addr_const (file, XEXP (x, 1), code);
14620 putc ('+', file);
14621 output_pic_addr_const (file, XEXP (x, 0), code);
14623 break;
14625 case MINUS:
14626 if (!TARGET_MACHO)
14627 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14628 output_pic_addr_const (file, XEXP (x, 0), code);
14629 putc ('-', file);
14630 output_pic_addr_const (file, XEXP (x, 1), code);
14631 if (!TARGET_MACHO)
14632 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14633 break;
14635 case UNSPEC:
14636 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14638 bool f = i386_asm_output_addr_const_extra (file, x);
14639 gcc_assert (f);
14640 break;
14643 gcc_assert (XVECLEN (x, 0) == 1);
14644 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14645 switch (XINT (x, 1))
14647 case UNSPEC_GOT:
14648 fputs ("@GOT", file);
14649 break;
14650 case UNSPEC_GOTOFF:
14651 fputs ("@GOTOFF", file);
14652 break;
14653 case UNSPEC_PLTOFF:
14654 fputs ("@PLTOFF", file);
14655 break;
14656 case UNSPEC_PCREL:
14657 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14658 "(%rip)" : "[rip]", file);
14659 break;
14660 case UNSPEC_GOTPCREL:
14661 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14662 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14663 break;
14664 case UNSPEC_GOTTPOFF:
14665 /* FIXME: This might be @TPOFF in Sun ld too. */
14666 fputs ("@gottpoff", file);
14667 break;
14668 case UNSPEC_TPOFF:
14669 fputs ("@tpoff", file);
14670 break;
14671 case UNSPEC_NTPOFF:
14672 if (TARGET_64BIT)
14673 fputs ("@tpoff", file);
14674 else
14675 fputs ("@ntpoff", file);
14676 break;
14677 case UNSPEC_DTPOFF:
14678 fputs ("@dtpoff", file);
14679 break;
14680 case UNSPEC_GOTNTPOFF:
14681 if (TARGET_64BIT)
14682 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14683 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14684 else
14685 fputs ("@gotntpoff", file);
14686 break;
14687 case UNSPEC_INDNTPOFF:
14688 fputs ("@indntpoff", file);
14689 break;
14690 #if TARGET_MACHO
14691 case UNSPEC_MACHOPIC_OFFSET:
14692 putc ('-', file);
14693 machopic_output_function_base_name (file);
14694 break;
14695 #endif
14696 default:
14697 output_operand_lossage ("invalid UNSPEC as operand");
14698 break;
14700 break;
14702 default:
14703 output_operand_lossage ("invalid expression as operand");
14707 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14708 We need to emit DTP-relative relocations. */
14710 static void ATTRIBUTE_UNUSED
14711 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14713 fputs (ASM_LONG, file);
14714 output_addr_const (file, x);
14715 fputs ("@dtpoff", file);
14716 switch (size)
14718 case 4:
14719 break;
14720 case 8:
14721 fputs (", 0", file);
14722 break;
14723 default:
14724 gcc_unreachable ();
14728 /* Return true if X is a representation of the PIC register. This copes
14729 with calls from ix86_find_base_term, where the register might have
14730 been replaced by a cselib value. */
14732 static bool
14733 ix86_pic_register_p (rtx x)
14735 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14736 return (pic_offset_table_rtx
14737 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14738 else if (!REG_P (x))
14739 return false;
14740 else if (pic_offset_table_rtx)
14742 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14743 return true;
14744 if (HARD_REGISTER_P (x)
14745 && !HARD_REGISTER_P (pic_offset_table_rtx)
14746 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14747 return true;
14748 return false;
14750 else
14751 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14754 /* Helper function for ix86_delegitimize_address.
14755 Attempt to delegitimize TLS local-exec accesses. */
14757 static rtx
14758 ix86_delegitimize_tls_address (rtx orig_x)
14760 rtx x = orig_x, unspec;
14761 struct ix86_address addr;
14763 if (!TARGET_TLS_DIRECT_SEG_REFS)
14764 return orig_x;
14765 if (MEM_P (x))
14766 x = XEXP (x, 0);
14767 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14768 return orig_x;
14769 if (ix86_decompose_address (x, &addr) == 0
14770 || addr.seg != DEFAULT_TLS_SEG_REG
14771 || addr.disp == NULL_RTX
14772 || GET_CODE (addr.disp) != CONST)
14773 return orig_x;
14774 unspec = XEXP (addr.disp, 0);
14775 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14776 unspec = XEXP (unspec, 0);
14777 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14778 return orig_x;
14779 x = XVECEXP (unspec, 0, 0);
14780 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14781 if (unspec != XEXP (addr.disp, 0))
14782 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14783 if (addr.index)
14785 rtx idx = addr.index;
14786 if (addr.scale != 1)
14787 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14788 x = gen_rtx_PLUS (Pmode, idx, x);
14790 if (addr.base)
14791 x = gen_rtx_PLUS (Pmode, addr.base, x);
14792 if (MEM_P (orig_x))
14793 x = replace_equiv_address_nv (orig_x, x);
14794 return x;
14797 /* In the name of slightly smaller debug output, and to cater to
14798 general assembler lossage, recognize PIC+GOTOFF and turn it back
14799 into a direct symbol reference.
14801 On Darwin, this is necessary to avoid a crash, because Darwin
14802 has a different PIC label for each routine but the DWARF debugging
14803 information is not associated with any particular routine, so it's
14804 necessary to remove references to the PIC label from RTL stored by
14805 the DWARF output code. */
14807 static rtx
14808 ix86_delegitimize_address (rtx x)
14810 rtx orig_x = delegitimize_mem_from_attrs (x);
14811 /* addend is NULL or some rtx if x is something+GOTOFF where
14812 something doesn't include the PIC register. */
14813 rtx addend = NULL_RTX;
14814 /* reg_addend is NULL or a multiple of some register. */
14815 rtx reg_addend = NULL_RTX;
14816 /* const_addend is NULL or a const_int. */
14817 rtx const_addend = NULL_RTX;
14818 /* This is the result, or NULL. */
14819 rtx result = NULL_RTX;
14821 x = orig_x;
14823 if (MEM_P (x))
14824 x = XEXP (x, 0);
14826 if (TARGET_64BIT)
14828 if (GET_CODE (x) == CONST
14829 && GET_CODE (XEXP (x, 0)) == PLUS
14830 && GET_MODE (XEXP (x, 0)) == Pmode
14831 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14832 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14833 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14835 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14836 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14837 if (MEM_P (orig_x))
14838 x = replace_equiv_address_nv (orig_x, x);
14839 return x;
14842 if (GET_CODE (x) == CONST
14843 && GET_CODE (XEXP (x, 0)) == UNSPEC
14844 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14845 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14846 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14848 x = XVECEXP (XEXP (x, 0), 0, 0);
14849 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14851 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14852 GET_MODE (x), 0);
14853 if (x == NULL_RTX)
14854 return orig_x;
14856 return x;
14859 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14860 return ix86_delegitimize_tls_address (orig_x);
14862 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14863 and -mcmodel=medium -fpic. */
14866 if (GET_CODE (x) != PLUS
14867 || GET_CODE (XEXP (x, 1)) != CONST)
14868 return ix86_delegitimize_tls_address (orig_x);
14870 if (ix86_pic_register_p (XEXP (x, 0)))
14871 /* %ebx + GOT/GOTOFF */
14873 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14875 /* %ebx + %reg * scale + GOT/GOTOFF */
14876 reg_addend = XEXP (x, 0);
14877 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14878 reg_addend = XEXP (reg_addend, 1);
14879 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14880 reg_addend = XEXP (reg_addend, 0);
14881 else
14883 reg_addend = NULL_RTX;
14884 addend = XEXP (x, 0);
14887 else
14888 addend = XEXP (x, 0);
14890 x = XEXP (XEXP (x, 1), 0);
14891 if (GET_CODE (x) == PLUS
14892 && CONST_INT_P (XEXP (x, 1)))
14894 const_addend = XEXP (x, 1);
14895 x = XEXP (x, 0);
14898 if (GET_CODE (x) == UNSPEC
14899 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14900 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14901 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14902 && !MEM_P (orig_x) && !addend)))
14903 result = XVECEXP (x, 0, 0);
14905 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14906 && !MEM_P (orig_x))
14907 result = XVECEXP (x, 0, 0);
14909 if (! result)
14910 return ix86_delegitimize_tls_address (orig_x);
14912 if (const_addend)
14913 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14914 if (reg_addend)
14915 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14916 if (addend)
14918 /* If the rest of original X doesn't involve the PIC register, add
14919 addend and subtract pic_offset_table_rtx. This can happen e.g.
14920 for code like:
14921 leal (%ebx, %ecx, 4), %ecx
14923 movl foo@GOTOFF(%ecx), %edx
14924 in which case we return (%ecx - %ebx) + foo
14925 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14926 and reload has completed. */
14927 if (pic_offset_table_rtx
14928 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14929 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14930 pic_offset_table_rtx),
14931 result);
14932 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14934 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14935 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14936 result = gen_rtx_PLUS (Pmode, tmp, result);
14938 else
14939 return orig_x;
14941 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14943 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14944 if (result == NULL_RTX)
14945 return orig_x;
14947 return result;
14950 /* If X is a machine specific address (i.e. a symbol or label being
14951 referenced as a displacement from the GOT implemented using an
14952 UNSPEC), then return the base term. Otherwise return X. */
14955 ix86_find_base_term (rtx x)
14957 rtx term;
14959 if (TARGET_64BIT)
14961 if (GET_CODE (x) != CONST)
14962 return x;
14963 term = XEXP (x, 0);
14964 if (GET_CODE (term) == PLUS
14965 && CONST_INT_P (XEXP (term, 1)))
14966 term = XEXP (term, 0);
14967 if (GET_CODE (term) != UNSPEC
14968 || (XINT (term, 1) != UNSPEC_GOTPCREL
14969 && XINT (term, 1) != UNSPEC_PCREL))
14970 return x;
14972 return XVECEXP (term, 0, 0);
14975 return ix86_delegitimize_address (x);
14978 static void
14979 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
14980 bool fp, FILE *file)
14982 const char *suffix;
14984 if (mode == CCFPmode || mode == CCFPUmode)
14986 code = ix86_fp_compare_code_to_integer (code);
14987 mode = CCmode;
14989 if (reverse)
14990 code = reverse_condition (code);
14992 switch (code)
14994 case EQ:
14995 switch (mode)
14997 case CCAmode:
14998 suffix = "a";
14999 break;
15001 case CCCmode:
15002 suffix = "c";
15003 break;
15005 case CCOmode:
15006 suffix = "o";
15007 break;
15009 case CCSmode:
15010 suffix = "s";
15011 break;
15013 default:
15014 suffix = "e";
15016 break;
15017 case NE:
15018 switch (mode)
15020 case CCAmode:
15021 suffix = "na";
15022 break;
15024 case CCCmode:
15025 suffix = "nc";
15026 break;
15028 case CCOmode:
15029 suffix = "no";
15030 break;
15032 case CCSmode:
15033 suffix = "ns";
15034 break;
15036 default:
15037 suffix = "ne";
15039 break;
15040 case GT:
15041 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15042 suffix = "g";
15043 break;
15044 case GTU:
15045 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15046 Those same assemblers have the same but opposite lossage on cmov. */
15047 if (mode == CCmode)
15048 suffix = fp ? "nbe" : "a";
15049 else
15050 gcc_unreachable ();
15051 break;
15052 case LT:
15053 switch (mode)
15055 case CCNOmode:
15056 case CCGOCmode:
15057 suffix = "s";
15058 break;
15060 case CCmode:
15061 case CCGCmode:
15062 suffix = "l";
15063 break;
15065 default:
15066 gcc_unreachable ();
15068 break;
15069 case LTU:
15070 if (mode == CCmode)
15071 suffix = "b";
15072 else if (mode == CCCmode)
15073 suffix = fp ? "b" : "c";
15074 else
15075 gcc_unreachable ();
15076 break;
15077 case GE:
15078 switch (mode)
15080 case CCNOmode:
15081 case CCGOCmode:
15082 suffix = "ns";
15083 break;
15085 case CCmode:
15086 case CCGCmode:
15087 suffix = "ge";
15088 break;
15090 default:
15091 gcc_unreachable ();
15093 break;
15094 case GEU:
15095 if (mode == CCmode)
15096 suffix = "nb";
15097 else if (mode == CCCmode)
15098 suffix = fp ? "nb" : "nc";
15099 else
15100 gcc_unreachable ();
15101 break;
15102 case LE:
15103 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15104 suffix = "le";
15105 break;
15106 case LEU:
15107 if (mode == CCmode)
15108 suffix = "be";
15109 else
15110 gcc_unreachable ();
15111 break;
15112 case UNORDERED:
15113 suffix = fp ? "u" : "p";
15114 break;
15115 case ORDERED:
15116 suffix = fp ? "nu" : "np";
15117 break;
15118 default:
15119 gcc_unreachable ();
15121 fputs (suffix, file);
15124 /* Print the name of register X to FILE based on its machine mode and number.
15125 If CODE is 'w', pretend the mode is HImode.
15126 If CODE is 'b', pretend the mode is QImode.
15127 If CODE is 'k', pretend the mode is SImode.
15128 If CODE is 'q', pretend the mode is DImode.
15129 If CODE is 'x', pretend the mode is V4SFmode.
15130 If CODE is 't', pretend the mode is V8SFmode.
15131 If CODE is 'g', pretend the mode is V16SFmode.
15132 If CODE is 'h', pretend the reg is the 'high' byte register.
15133 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15134 If CODE is 'd', duplicate the operand for AVX instruction.
15137 void
15138 print_reg (rtx x, int code, FILE *file)
15140 const char *reg;
15141 int msize;
15142 unsigned int regno;
15143 bool duplicated;
15145 if (ASSEMBLER_DIALECT == ASM_ATT)
15146 putc ('%', file);
15148 if (x == pc_rtx)
15150 gcc_assert (TARGET_64BIT);
15151 fputs ("rip", file);
15152 return;
15155 if (code == 'y' && STACK_TOP_P (x))
15157 fputs ("st(0)", file);
15158 return;
15161 if (code == 'w')
15162 msize = 2;
15163 else if (code == 'b')
15164 msize = 1;
15165 else if (code == 'k')
15166 msize = 4;
15167 else if (code == 'q')
15168 msize = 8;
15169 else if (code == 'h')
15170 msize = 0;
15171 else if (code == 'x')
15172 msize = 16;
15173 else if (code == 't')
15174 msize = 32;
15175 else if (code == 'g')
15176 msize = 64;
15177 else
15178 msize = GET_MODE_SIZE (GET_MODE (x));
15180 regno = true_regnum (x);
15182 gcc_assert (regno != ARG_POINTER_REGNUM
15183 && regno != FRAME_POINTER_REGNUM
15184 && regno != FLAGS_REG
15185 && regno != FPSR_REG
15186 && regno != FPCR_REG);
15188 duplicated = code == 'd' && TARGET_AVX;
15190 switch (msize)
15192 case 8:
15193 case 4:
15194 if (LEGACY_INT_REGNO_P (regno))
15195 putc (msize == 8 ? 'r' : 'e', file);
15196 case 16:
15197 case 12:
15198 case 2:
15199 normal:
15200 reg = hi_reg_name[regno];
15201 break;
15202 case 1:
15203 if (regno >= ARRAY_SIZE (qi_reg_name))
15204 goto normal;
15205 reg = qi_reg_name[regno];
15206 break;
15207 case 0:
15208 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15209 goto normal;
15210 reg = qi_high_reg_name[regno];
15211 break;
15212 case 32:
15213 case 64:
15214 if (SSE_REGNO_P (regno))
15216 gcc_assert (!duplicated);
15217 putc (msize == 32 ? 'y' : 'z', file);
15218 reg = hi_reg_name[regno] + 1;
15219 break;
15221 goto normal;
15222 default:
15223 gcc_unreachable ();
15226 fputs (reg, file);
15228 /* Irritatingly, AMD extended registers use
15229 different naming convention: "r%d[bwd]" */
15230 if (REX_INT_REGNO_P (regno))
15232 gcc_assert (TARGET_64BIT);
15233 switch (msize)
15235 case 0:
15236 error ("extended registers have no high halves");
15237 break;
15238 case 1:
15239 putc ('b', file);
15240 break;
15241 case 2:
15242 putc ('w', file);
15243 break;
15244 case 4:
15245 putc ('d', file);
15246 break;
15247 case 8:
15248 /* no suffix */
15249 break;
15250 default:
15251 error ("unsupported operand size for extended register");
15252 break;
15254 return;
15257 if (duplicated)
15259 if (ASSEMBLER_DIALECT == ASM_ATT)
15260 fprintf (file, ", %%%s", reg);
15261 else
15262 fprintf (file, ", %s", reg);
15266 /* Meaning of CODE:
15267 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15268 C -- print opcode suffix for set/cmov insn.
15269 c -- like C, but print reversed condition
15270 F,f -- likewise, but for floating-point.
15271 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15272 otherwise nothing
15273 R -- print embeded rounding and sae.
15274 r -- print only sae.
15275 z -- print the opcode suffix for the size of the current operand.
15276 Z -- likewise, with special suffixes for x87 instructions.
15277 * -- print a star (in certain assembler syntax)
15278 A -- print an absolute memory reference.
15279 E -- print address with DImode register names if TARGET_64BIT.
15280 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15281 s -- print a shift double count, followed by the assemblers argument
15282 delimiter.
15283 b -- print the QImode name of the register for the indicated operand.
15284 %b0 would print %al if operands[0] is reg 0.
15285 w -- likewise, print the HImode name of the register.
15286 k -- likewise, print the SImode name of the register.
15287 q -- likewise, print the DImode name of the register.
15288 x -- likewise, print the V4SFmode name of the register.
15289 t -- likewise, print the V8SFmode name of the register.
15290 g -- likewise, print the V16SFmode name of the register.
15291 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15292 y -- print "st(0)" instead of "st" as a register.
15293 d -- print duplicated register operand for AVX instruction.
15294 D -- print condition for SSE cmp instruction.
15295 P -- if PIC, print an @PLT suffix.
15296 p -- print raw symbol name.
15297 X -- don't print any sort of PIC '@' suffix for a symbol.
15298 & -- print some in-use local-dynamic symbol name.
15299 H -- print a memory address offset by 8; used for sse high-parts
15300 Y -- print condition for XOP pcom* instruction.
15301 + -- print a branch hint as 'cs' or 'ds' prefix
15302 ; -- print a semicolon (after prefixes due to bug in older gas).
15303 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15304 @ -- print a segment register of thread base pointer load
15305 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15306 ! -- print MPX prefix for jxx/call/ret instructions if required.
15309 void
15310 ix86_print_operand (FILE *file, rtx x, int code)
15312 if (code)
15314 switch (code)
15316 case 'A':
15317 switch (ASSEMBLER_DIALECT)
15319 case ASM_ATT:
15320 putc ('*', file);
15321 break;
15323 case ASM_INTEL:
15324 /* Intel syntax. For absolute addresses, registers should not
15325 be surrounded by braces. */
15326 if (!REG_P (x))
15328 putc ('[', file);
15329 ix86_print_operand (file, x, 0);
15330 putc (']', file);
15331 return;
15333 break;
15335 default:
15336 gcc_unreachable ();
15339 ix86_print_operand (file, x, 0);
15340 return;
15342 case 'E':
15343 /* Wrap address in an UNSPEC to declare special handling. */
15344 if (TARGET_64BIT)
15345 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15347 output_address (x);
15348 return;
15350 case 'L':
15351 if (ASSEMBLER_DIALECT == ASM_ATT)
15352 putc ('l', file);
15353 return;
15355 case 'W':
15356 if (ASSEMBLER_DIALECT == ASM_ATT)
15357 putc ('w', file);
15358 return;
15360 case 'B':
15361 if (ASSEMBLER_DIALECT == ASM_ATT)
15362 putc ('b', file);
15363 return;
15365 case 'Q':
15366 if (ASSEMBLER_DIALECT == ASM_ATT)
15367 putc ('l', file);
15368 return;
15370 case 'S':
15371 if (ASSEMBLER_DIALECT == ASM_ATT)
15372 putc ('s', file);
15373 return;
15375 case 'T':
15376 if (ASSEMBLER_DIALECT == ASM_ATT)
15377 putc ('t', file);
15378 return;
15380 case 'O':
15381 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15382 if (ASSEMBLER_DIALECT != ASM_ATT)
15383 return;
15385 switch (GET_MODE_SIZE (GET_MODE (x)))
15387 case 2:
15388 putc ('w', file);
15389 break;
15391 case 4:
15392 putc ('l', file);
15393 break;
15395 case 8:
15396 putc ('q', file);
15397 break;
15399 default:
15400 output_operand_lossage
15401 ("invalid operand size for operand code 'O'");
15402 return;
15405 putc ('.', file);
15406 #endif
15407 return;
15409 case 'z':
15410 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15412 /* Opcodes don't get size suffixes if using Intel opcodes. */
15413 if (ASSEMBLER_DIALECT == ASM_INTEL)
15414 return;
15416 switch (GET_MODE_SIZE (GET_MODE (x)))
15418 case 1:
15419 putc ('b', file);
15420 return;
15422 case 2:
15423 putc ('w', file);
15424 return;
15426 case 4:
15427 putc ('l', file);
15428 return;
15430 case 8:
15431 putc ('q', file);
15432 return;
15434 default:
15435 output_operand_lossage
15436 ("invalid operand size for operand code 'z'");
15437 return;
15441 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15442 warning
15443 (0, "non-integer operand used with operand code 'z'");
15444 /* FALLTHRU */
15446 case 'Z':
15447 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15448 if (ASSEMBLER_DIALECT == ASM_INTEL)
15449 return;
15451 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15453 switch (GET_MODE_SIZE (GET_MODE (x)))
15455 case 2:
15456 #ifdef HAVE_AS_IX86_FILDS
15457 putc ('s', file);
15458 #endif
15459 return;
15461 case 4:
15462 putc ('l', file);
15463 return;
15465 case 8:
15466 #ifdef HAVE_AS_IX86_FILDQ
15467 putc ('q', file);
15468 #else
15469 fputs ("ll", file);
15470 #endif
15471 return;
15473 default:
15474 break;
15477 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15479 /* 387 opcodes don't get size suffixes
15480 if the operands are registers. */
15481 if (STACK_REG_P (x))
15482 return;
15484 switch (GET_MODE_SIZE (GET_MODE (x)))
15486 case 4:
15487 putc ('s', file);
15488 return;
15490 case 8:
15491 putc ('l', file);
15492 return;
15494 case 12:
15495 case 16:
15496 putc ('t', file);
15497 return;
15499 default:
15500 break;
15503 else
15505 output_operand_lossage
15506 ("invalid operand type used with operand code 'Z'");
15507 return;
15510 output_operand_lossage
15511 ("invalid operand size for operand code 'Z'");
15512 return;
15514 case 'd':
15515 case 'b':
15516 case 'w':
15517 case 'k':
15518 case 'q':
15519 case 'h':
15520 case 't':
15521 case 'g':
15522 case 'y':
15523 case 'x':
15524 case 'X':
15525 case 'P':
15526 case 'p':
15527 break;
15529 case 's':
15530 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15532 ix86_print_operand (file, x, 0);
15533 fputs (", ", file);
15535 return;
15537 case 'Y':
15538 switch (GET_CODE (x))
15540 case NE:
15541 fputs ("neq", file);
15542 break;
15543 case EQ:
15544 fputs ("eq", file);
15545 break;
15546 case GE:
15547 case GEU:
15548 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15549 break;
15550 case GT:
15551 case GTU:
15552 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15553 break;
15554 case LE:
15555 case LEU:
15556 fputs ("le", file);
15557 break;
15558 case LT:
15559 case LTU:
15560 fputs ("lt", file);
15561 break;
15562 case UNORDERED:
15563 fputs ("unord", file);
15564 break;
15565 case ORDERED:
15566 fputs ("ord", file);
15567 break;
15568 case UNEQ:
15569 fputs ("ueq", file);
15570 break;
15571 case UNGE:
15572 fputs ("nlt", file);
15573 break;
15574 case UNGT:
15575 fputs ("nle", file);
15576 break;
15577 case UNLE:
15578 fputs ("ule", file);
15579 break;
15580 case UNLT:
15581 fputs ("ult", file);
15582 break;
15583 case LTGT:
15584 fputs ("une", file);
15585 break;
15586 default:
15587 output_operand_lossage ("operand is not a condition code, "
15588 "invalid operand code 'Y'");
15589 return;
15591 return;
15593 case 'D':
15594 /* Little bit of braindamage here. The SSE compare instructions
15595 does use completely different names for the comparisons that the
15596 fp conditional moves. */
15597 switch (GET_CODE (x))
15599 case UNEQ:
15600 if (TARGET_AVX)
15602 fputs ("eq_us", file);
15603 break;
15605 case EQ:
15606 fputs ("eq", file);
15607 break;
15608 case UNLT:
15609 if (TARGET_AVX)
15611 fputs ("nge", file);
15612 break;
15614 case LT:
15615 fputs ("lt", file);
15616 break;
15617 case UNLE:
15618 if (TARGET_AVX)
15620 fputs ("ngt", file);
15621 break;
15623 case LE:
15624 fputs ("le", file);
15625 break;
15626 case UNORDERED:
15627 fputs ("unord", file);
15628 break;
15629 case LTGT:
15630 if (TARGET_AVX)
15632 fputs ("neq_oq", file);
15633 break;
15635 case NE:
15636 fputs ("neq", file);
15637 break;
15638 case GE:
15639 if (TARGET_AVX)
15641 fputs ("ge", file);
15642 break;
15644 case UNGE:
15645 fputs ("nlt", file);
15646 break;
15647 case GT:
15648 if (TARGET_AVX)
15650 fputs ("gt", file);
15651 break;
15653 case UNGT:
15654 fputs ("nle", file);
15655 break;
15656 case ORDERED:
15657 fputs ("ord", file);
15658 break;
15659 default:
15660 output_operand_lossage ("operand is not a condition code, "
15661 "invalid operand code 'D'");
15662 return;
15664 return;
15666 case 'F':
15667 case 'f':
15668 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15669 if (ASSEMBLER_DIALECT == ASM_ATT)
15670 putc ('.', file);
15671 #endif
15673 case 'C':
15674 case 'c':
15675 if (!COMPARISON_P (x))
15677 output_operand_lossage ("operand is not a condition code, "
15678 "invalid operand code '%c'", code);
15679 return;
15681 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15682 code == 'c' || code == 'f',
15683 code == 'F' || code == 'f',
15684 file);
15685 return;
15687 case 'H':
15688 if (!offsettable_memref_p (x))
15690 output_operand_lossage ("operand is not an offsettable memory "
15691 "reference, invalid operand code 'H'");
15692 return;
15694 /* It doesn't actually matter what mode we use here, as we're
15695 only going to use this for printing. */
15696 x = adjust_address_nv (x, DImode, 8);
15697 /* Output 'qword ptr' for intel assembler dialect. */
15698 if (ASSEMBLER_DIALECT == ASM_INTEL)
15699 code = 'q';
15700 break;
15702 case 'K':
15703 gcc_assert (CONST_INT_P (x));
15705 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15706 #ifdef HAVE_AS_IX86_HLE
15707 fputs ("xacquire ", file);
15708 #else
15709 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15710 #endif
15711 else if (INTVAL (x) & IX86_HLE_RELEASE)
15712 #ifdef HAVE_AS_IX86_HLE
15713 fputs ("xrelease ", file);
15714 #else
15715 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15716 #endif
15717 /* We do not want to print value of the operand. */
15718 return;
15720 case 'N':
15721 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15722 fputs ("{z}", file);
15723 return;
15725 case 'r':
15726 gcc_assert (CONST_INT_P (x));
15727 gcc_assert (INTVAL (x) == ROUND_SAE);
15729 if (ASSEMBLER_DIALECT == ASM_INTEL)
15730 fputs (", ", file);
15732 fputs ("{sae}", file);
15734 if (ASSEMBLER_DIALECT == ASM_ATT)
15735 fputs (", ", file);
15737 return;
15739 case 'R':
15740 gcc_assert (CONST_INT_P (x));
15742 if (ASSEMBLER_DIALECT == ASM_INTEL)
15743 fputs (", ", file);
15745 switch (INTVAL (x))
15747 case ROUND_NEAREST_INT | ROUND_SAE:
15748 fputs ("{rn-sae}", file);
15749 break;
15750 case ROUND_NEG_INF | ROUND_SAE:
15751 fputs ("{rd-sae}", file);
15752 break;
15753 case ROUND_POS_INF | ROUND_SAE:
15754 fputs ("{ru-sae}", file);
15755 break;
15756 case ROUND_ZERO | ROUND_SAE:
15757 fputs ("{rz-sae}", file);
15758 break;
15759 default:
15760 gcc_unreachable ();
15763 if (ASSEMBLER_DIALECT == ASM_ATT)
15764 fputs (", ", file);
15766 return;
15768 case '*':
15769 if (ASSEMBLER_DIALECT == ASM_ATT)
15770 putc ('*', file);
15771 return;
15773 case '&':
15775 const char *name = get_some_local_dynamic_name ();
15776 if (name == NULL)
15777 output_operand_lossage ("'%%&' used without any "
15778 "local dynamic TLS references");
15779 else
15780 assemble_name (file, name);
15781 return;
15784 case '+':
15786 rtx x;
15788 if (!optimize
15789 || optimize_function_for_size_p (cfun)
15790 || !TARGET_BRANCH_PREDICTION_HINTS)
15791 return;
15793 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15794 if (x)
15796 int pred_val = XINT (x, 0);
15798 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15799 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15801 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15802 bool cputaken
15803 = final_forward_branch_p (current_output_insn) == 0;
15805 /* Emit hints only in the case default branch prediction
15806 heuristics would fail. */
15807 if (taken != cputaken)
15809 /* We use 3e (DS) prefix for taken branches and
15810 2e (CS) prefix for not taken branches. */
15811 if (taken)
15812 fputs ("ds ; ", file);
15813 else
15814 fputs ("cs ; ", file);
15818 return;
15821 case ';':
15822 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15823 putc (';', file);
15824 #endif
15825 return;
15827 case '@':
15828 if (ASSEMBLER_DIALECT == ASM_ATT)
15829 putc ('%', file);
15831 /* The kernel uses a different segment register for performance
15832 reasons; a system call would not have to trash the userspace
15833 segment register, which would be expensive. */
15834 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15835 fputs ("fs", file);
15836 else
15837 fputs ("gs", file);
15838 return;
15840 case '~':
15841 putc (TARGET_AVX2 ? 'i' : 'f', file);
15842 return;
15844 case '^':
15845 if (TARGET_64BIT && Pmode != word_mode)
15846 fputs ("addr32 ", file);
15847 return;
15849 case '!':
15850 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15851 fputs ("bnd ", file);
15852 return;
15854 default:
15855 output_operand_lossage ("invalid operand code '%c'", code);
15859 if (REG_P (x))
15860 print_reg (x, code, file);
15862 else if (MEM_P (x))
15864 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15865 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15866 && GET_MODE (x) != BLKmode)
15868 const char * size;
15869 switch (GET_MODE_SIZE (GET_MODE (x)))
15871 case 1: size = "BYTE"; break;
15872 case 2: size = "WORD"; break;
15873 case 4: size = "DWORD"; break;
15874 case 8: size = "QWORD"; break;
15875 case 12: size = "TBYTE"; break;
15876 case 16:
15877 if (GET_MODE (x) == XFmode)
15878 size = "TBYTE";
15879 else
15880 size = "XMMWORD";
15881 break;
15882 case 32: size = "YMMWORD"; break;
15883 case 64: size = "ZMMWORD"; break;
15884 default:
15885 gcc_unreachable ();
15888 /* Check for explicit size override (codes 'b', 'w', 'k',
15889 'q' and 'x') */
15890 if (code == 'b')
15891 size = "BYTE";
15892 else if (code == 'w')
15893 size = "WORD";
15894 else if (code == 'k')
15895 size = "DWORD";
15896 else if (code == 'q')
15897 size = "QWORD";
15898 else if (code == 'x')
15899 size = "XMMWORD";
15901 fputs (size, file);
15902 fputs (" PTR ", file);
15905 x = XEXP (x, 0);
15906 /* Avoid (%rip) for call operands. */
15907 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15908 && !CONST_INT_P (x))
15909 output_addr_const (file, x);
15910 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15911 output_operand_lossage ("invalid constraints for operand");
15912 else
15913 output_address (x);
15916 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
15918 REAL_VALUE_TYPE r;
15919 long l;
15921 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15922 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15924 if (ASSEMBLER_DIALECT == ASM_ATT)
15925 putc ('$', file);
15926 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15927 if (code == 'q')
15928 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15929 (unsigned long long) (int) l);
15930 else
15931 fprintf (file, "0x%08x", (unsigned int) l);
15934 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
15936 REAL_VALUE_TYPE r;
15937 long l[2];
15939 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15940 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15942 if (ASSEMBLER_DIALECT == ASM_ATT)
15943 putc ('$', file);
15944 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15947 /* These float cases don't actually occur as immediate operands. */
15948 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
15950 char dstr[30];
15952 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15953 fputs (dstr, file);
15956 else
15958 /* We have patterns that allow zero sets of memory, for instance.
15959 In 64-bit mode, we should probably support all 8-byte vectors,
15960 since we can in fact encode that into an immediate. */
15961 if (GET_CODE (x) == CONST_VECTOR)
15963 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15964 x = const0_rtx;
15967 if (code != 'P' && code != 'p')
15969 if (CONST_INT_P (x))
15971 if (ASSEMBLER_DIALECT == ASM_ATT)
15972 putc ('$', file);
15974 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15975 || GET_CODE (x) == LABEL_REF)
15977 if (ASSEMBLER_DIALECT == ASM_ATT)
15978 putc ('$', file);
15979 else
15980 fputs ("OFFSET FLAT:", file);
15983 if (CONST_INT_P (x))
15984 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15985 else if (flag_pic || MACHOPIC_INDIRECT)
15986 output_pic_addr_const (file, x, code);
15987 else
15988 output_addr_const (file, x);
15992 static bool
15993 ix86_print_operand_punct_valid_p (unsigned char code)
15995 return (code == '@' || code == '*' || code == '+' || code == '&'
15996 || code == ';' || code == '~' || code == '^' || code == '!');
15999 /* Print a memory operand whose address is ADDR. */
16001 static void
16002 ix86_print_operand_address (FILE *file, rtx addr)
16004 struct ix86_address parts;
16005 rtx base, index, disp;
16006 int scale;
16007 int ok;
16008 bool vsib = false;
16009 int code = 0;
16011 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16013 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16014 gcc_assert (parts.index == NULL_RTX);
16015 parts.index = XVECEXP (addr, 0, 1);
16016 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16017 addr = XVECEXP (addr, 0, 0);
16018 vsib = true;
16020 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16022 gcc_assert (TARGET_64BIT);
16023 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16024 code = 'q';
16026 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16028 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16029 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16030 if (parts.base != NULL_RTX)
16032 parts.index = parts.base;
16033 parts.scale = 1;
16035 parts.base = XVECEXP (addr, 0, 0);
16036 addr = XVECEXP (addr, 0, 0);
16038 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16040 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16041 gcc_assert (parts.index == NULL_RTX);
16042 parts.index = XVECEXP (addr, 0, 1);
16043 addr = XVECEXP (addr, 0, 0);
16045 else
16046 ok = ix86_decompose_address (addr, &parts);
16048 gcc_assert (ok);
16050 base = parts.base;
16051 index = parts.index;
16052 disp = parts.disp;
16053 scale = parts.scale;
16055 switch (parts.seg)
16057 case SEG_DEFAULT:
16058 break;
16059 case SEG_FS:
16060 case SEG_GS:
16061 if (ASSEMBLER_DIALECT == ASM_ATT)
16062 putc ('%', file);
16063 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16064 break;
16065 default:
16066 gcc_unreachable ();
16069 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16070 if (TARGET_64BIT && !base && !index)
16072 rtx symbol = disp;
16074 if (GET_CODE (disp) == CONST
16075 && GET_CODE (XEXP (disp, 0)) == PLUS
16076 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16077 symbol = XEXP (XEXP (disp, 0), 0);
16079 if (GET_CODE (symbol) == LABEL_REF
16080 || (GET_CODE (symbol) == SYMBOL_REF
16081 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16082 base = pc_rtx;
16084 if (!base && !index)
16086 /* Displacement only requires special attention. */
16088 if (CONST_INT_P (disp))
16090 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16091 fputs ("ds:", file);
16092 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16094 else if (flag_pic)
16095 output_pic_addr_const (file, disp, 0);
16096 else
16097 output_addr_const (file, disp);
16099 else
16101 /* Print SImode register names to force addr32 prefix. */
16102 if (SImode_address_operand (addr, VOIDmode))
16104 #ifdef ENABLE_CHECKING
16105 gcc_assert (TARGET_64BIT);
16106 switch (GET_CODE (addr))
16108 case SUBREG:
16109 gcc_assert (GET_MODE (addr) == SImode);
16110 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16111 break;
16112 case ZERO_EXTEND:
16113 case AND:
16114 gcc_assert (GET_MODE (addr) == DImode);
16115 break;
16116 default:
16117 gcc_unreachable ();
16119 #endif
16120 gcc_assert (!code);
16121 code = 'k';
16123 else if (code == 0
16124 && TARGET_X32
16125 && disp
16126 && CONST_INT_P (disp)
16127 && INTVAL (disp) < -16*1024*1024)
16129 /* X32 runs in 64-bit mode, where displacement, DISP, in
16130 address DISP(%r64), is encoded as 32-bit immediate sign-
16131 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16132 address is %r64 + 0xffffffffbffffd00. When %r64 <
16133 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16134 which is invalid for x32. The correct address is %r64
16135 - 0x40000300 == 0xf7ffdd64. To properly encode
16136 -0x40000300(%r64) for x32, we zero-extend negative
16137 displacement by forcing addr32 prefix which truncates
16138 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16139 zero-extend all negative displacements, including -1(%rsp).
16140 However, for small negative displacements, sign-extension
16141 won't cause overflow. We only zero-extend negative
16142 displacements if they < -16*1024*1024, which is also used
16143 to check legitimate address displacements for PIC. */
16144 code = 'k';
16147 if (ASSEMBLER_DIALECT == ASM_ATT)
16149 if (disp)
16151 if (flag_pic)
16152 output_pic_addr_const (file, disp, 0);
16153 else if (GET_CODE (disp) == LABEL_REF)
16154 output_asm_label (disp);
16155 else
16156 output_addr_const (file, disp);
16159 putc ('(', file);
16160 if (base)
16161 print_reg (base, code, file);
16162 if (index)
16164 putc (',', file);
16165 print_reg (index, vsib ? 0 : code, file);
16166 if (scale != 1 || vsib)
16167 fprintf (file, ",%d", scale);
16169 putc (')', file);
16171 else
16173 rtx offset = NULL_RTX;
16175 if (disp)
16177 /* Pull out the offset of a symbol; print any symbol itself. */
16178 if (GET_CODE (disp) == CONST
16179 && GET_CODE (XEXP (disp, 0)) == PLUS
16180 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16182 offset = XEXP (XEXP (disp, 0), 1);
16183 disp = gen_rtx_CONST (VOIDmode,
16184 XEXP (XEXP (disp, 0), 0));
16187 if (flag_pic)
16188 output_pic_addr_const (file, disp, 0);
16189 else if (GET_CODE (disp) == LABEL_REF)
16190 output_asm_label (disp);
16191 else if (CONST_INT_P (disp))
16192 offset = disp;
16193 else
16194 output_addr_const (file, disp);
16197 putc ('[', file);
16198 if (base)
16200 print_reg (base, code, file);
16201 if (offset)
16203 if (INTVAL (offset) >= 0)
16204 putc ('+', file);
16205 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16208 else if (offset)
16209 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16210 else
16211 putc ('0', file);
16213 if (index)
16215 putc ('+', file);
16216 print_reg (index, vsib ? 0 : code, file);
16217 if (scale != 1 || vsib)
16218 fprintf (file, "*%d", scale);
16220 putc (']', file);
16225 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16227 static bool
16228 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16230 rtx op;
16232 if (GET_CODE (x) != UNSPEC)
16233 return false;
16235 op = XVECEXP (x, 0, 0);
16236 switch (XINT (x, 1))
16238 case UNSPEC_GOTTPOFF:
16239 output_addr_const (file, op);
16240 /* FIXME: This might be @TPOFF in Sun ld. */
16241 fputs ("@gottpoff", file);
16242 break;
16243 case UNSPEC_TPOFF:
16244 output_addr_const (file, op);
16245 fputs ("@tpoff", file);
16246 break;
16247 case UNSPEC_NTPOFF:
16248 output_addr_const (file, op);
16249 if (TARGET_64BIT)
16250 fputs ("@tpoff", file);
16251 else
16252 fputs ("@ntpoff", file);
16253 break;
16254 case UNSPEC_DTPOFF:
16255 output_addr_const (file, op);
16256 fputs ("@dtpoff", file);
16257 break;
16258 case UNSPEC_GOTNTPOFF:
16259 output_addr_const (file, op);
16260 if (TARGET_64BIT)
16261 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16262 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16263 else
16264 fputs ("@gotntpoff", file);
16265 break;
16266 case UNSPEC_INDNTPOFF:
16267 output_addr_const (file, op);
16268 fputs ("@indntpoff", file);
16269 break;
16270 #if TARGET_MACHO
16271 case UNSPEC_MACHOPIC_OFFSET:
16272 output_addr_const (file, op);
16273 putc ('-', file);
16274 machopic_output_function_base_name (file);
16275 break;
16276 #endif
16278 case UNSPEC_STACK_CHECK:
16280 int offset;
16282 gcc_assert (flag_split_stack);
16284 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16285 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16286 #else
16287 gcc_unreachable ();
16288 #endif
16290 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16292 break;
16294 default:
16295 return false;
16298 return true;
16301 /* Split one or more double-mode RTL references into pairs of half-mode
16302 references. The RTL can be REG, offsettable MEM, integer constant, or
16303 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16304 split and "num" is its length. lo_half and hi_half are output arrays
16305 that parallel "operands". */
16307 void
16308 split_double_mode (machine_mode mode, rtx operands[],
16309 int num, rtx lo_half[], rtx hi_half[])
16311 machine_mode half_mode;
16312 unsigned int byte;
16314 switch (mode)
16316 case TImode:
16317 half_mode = DImode;
16318 break;
16319 case DImode:
16320 half_mode = SImode;
16321 break;
16322 default:
16323 gcc_unreachable ();
16326 byte = GET_MODE_SIZE (half_mode);
16328 while (num--)
16330 rtx op = operands[num];
16332 /* simplify_subreg refuse to split volatile memory addresses,
16333 but we still have to handle it. */
16334 if (MEM_P (op))
16336 lo_half[num] = adjust_address (op, half_mode, 0);
16337 hi_half[num] = adjust_address (op, half_mode, byte);
16339 else
16341 lo_half[num] = simplify_gen_subreg (half_mode, op,
16342 GET_MODE (op) == VOIDmode
16343 ? mode : GET_MODE (op), 0);
16344 hi_half[num] = simplify_gen_subreg (half_mode, op,
16345 GET_MODE (op) == VOIDmode
16346 ? mode : GET_MODE (op), byte);
16351 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16352 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16353 is the expression of the binary operation. The output may either be
16354 emitted here, or returned to the caller, like all output_* functions.
16356 There is no guarantee that the operands are the same mode, as they
16357 might be within FLOAT or FLOAT_EXTEND expressions. */
16359 #ifndef SYSV386_COMPAT
16360 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16361 wants to fix the assemblers because that causes incompatibility
16362 with gcc. No-one wants to fix gcc because that causes
16363 incompatibility with assemblers... You can use the option of
16364 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16365 #define SYSV386_COMPAT 1
16366 #endif
16368 const char *
16369 output_387_binary_op (rtx insn, rtx *operands)
16371 static char buf[40];
16372 const char *p;
16373 const char *ssep;
16374 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16376 #ifdef ENABLE_CHECKING
16377 /* Even if we do not want to check the inputs, this documents input
16378 constraints. Which helps in understanding the following code. */
16379 if (STACK_REG_P (operands[0])
16380 && ((REG_P (operands[1])
16381 && REGNO (operands[0]) == REGNO (operands[1])
16382 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16383 || (REG_P (operands[2])
16384 && REGNO (operands[0]) == REGNO (operands[2])
16385 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16386 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16387 ; /* ok */
16388 else
16389 gcc_assert (is_sse);
16390 #endif
16392 switch (GET_CODE (operands[3]))
16394 case PLUS:
16395 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16396 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16397 p = "fiadd";
16398 else
16399 p = "fadd";
16400 ssep = "vadd";
16401 break;
16403 case MINUS:
16404 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16405 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16406 p = "fisub";
16407 else
16408 p = "fsub";
16409 ssep = "vsub";
16410 break;
16412 case MULT:
16413 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16414 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16415 p = "fimul";
16416 else
16417 p = "fmul";
16418 ssep = "vmul";
16419 break;
16421 case DIV:
16422 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16423 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16424 p = "fidiv";
16425 else
16426 p = "fdiv";
16427 ssep = "vdiv";
16428 break;
16430 default:
16431 gcc_unreachable ();
16434 if (is_sse)
16436 if (TARGET_AVX)
16438 strcpy (buf, ssep);
16439 if (GET_MODE (operands[0]) == SFmode)
16440 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16441 else
16442 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16444 else
16446 strcpy (buf, ssep + 1);
16447 if (GET_MODE (operands[0]) == SFmode)
16448 strcat (buf, "ss\t{%2, %0|%0, %2}");
16449 else
16450 strcat (buf, "sd\t{%2, %0|%0, %2}");
16452 return buf;
16454 strcpy (buf, p);
16456 switch (GET_CODE (operands[3]))
16458 case MULT:
16459 case PLUS:
16460 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16461 std::swap (operands[1], operands[2]);
16463 /* know operands[0] == operands[1]. */
16465 if (MEM_P (operands[2]))
16467 p = "%Z2\t%2";
16468 break;
16471 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16473 if (STACK_TOP_P (operands[0]))
16474 /* How is it that we are storing to a dead operand[2]?
16475 Well, presumably operands[1] is dead too. We can't
16476 store the result to st(0) as st(0) gets popped on this
16477 instruction. Instead store to operands[2] (which I
16478 think has to be st(1)). st(1) will be popped later.
16479 gcc <= 2.8.1 didn't have this check and generated
16480 assembly code that the Unixware assembler rejected. */
16481 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16482 else
16483 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16484 break;
16487 if (STACK_TOP_P (operands[0]))
16488 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16489 else
16490 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16491 break;
16493 case MINUS:
16494 case DIV:
16495 if (MEM_P (operands[1]))
16497 p = "r%Z1\t%1";
16498 break;
16501 if (MEM_P (operands[2]))
16503 p = "%Z2\t%2";
16504 break;
16507 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16509 #if SYSV386_COMPAT
16510 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16511 derived assemblers, confusingly reverse the direction of
16512 the operation for fsub{r} and fdiv{r} when the
16513 destination register is not st(0). The Intel assembler
16514 doesn't have this brain damage. Read !SYSV386_COMPAT to
16515 figure out what the hardware really does. */
16516 if (STACK_TOP_P (operands[0]))
16517 p = "{p\t%0, %2|rp\t%2, %0}";
16518 else
16519 p = "{rp\t%2, %0|p\t%0, %2}";
16520 #else
16521 if (STACK_TOP_P (operands[0]))
16522 /* As above for fmul/fadd, we can't store to st(0). */
16523 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16524 else
16525 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16526 #endif
16527 break;
16530 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16532 #if SYSV386_COMPAT
16533 if (STACK_TOP_P (operands[0]))
16534 p = "{rp\t%0, %1|p\t%1, %0}";
16535 else
16536 p = "{p\t%1, %0|rp\t%0, %1}";
16537 #else
16538 if (STACK_TOP_P (operands[0]))
16539 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16540 else
16541 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16542 #endif
16543 break;
16546 if (STACK_TOP_P (operands[0]))
16548 if (STACK_TOP_P (operands[1]))
16549 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16550 else
16551 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16552 break;
16554 else if (STACK_TOP_P (operands[1]))
16556 #if SYSV386_COMPAT
16557 p = "{\t%1, %0|r\t%0, %1}";
16558 #else
16559 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16560 #endif
16562 else
16564 #if SYSV386_COMPAT
16565 p = "{r\t%2, %0|\t%0, %2}";
16566 #else
16567 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16568 #endif
16570 break;
16572 default:
16573 gcc_unreachable ();
16576 strcat (buf, p);
16577 return buf;
16580 /* Check if a 256bit AVX register is referenced inside of EXP. */
16582 static bool
16583 ix86_check_avx256_register (const_rtx exp)
16585 if (GET_CODE (exp) == SUBREG)
16586 exp = SUBREG_REG (exp);
16588 return (REG_P (exp)
16589 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16592 /* Return needed mode for entity in optimize_mode_switching pass. */
16594 static int
16595 ix86_avx_u128_mode_needed (rtx_insn *insn)
16597 if (CALL_P (insn))
16599 rtx link;
16601 /* Needed mode is set to AVX_U128_CLEAN if there are
16602 no 256bit modes used in function arguments. */
16603 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16604 link;
16605 link = XEXP (link, 1))
16607 if (GET_CODE (XEXP (link, 0)) == USE)
16609 rtx arg = XEXP (XEXP (link, 0), 0);
16611 if (ix86_check_avx256_register (arg))
16612 return AVX_U128_DIRTY;
16616 return AVX_U128_CLEAN;
16619 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16620 changes state only when a 256bit register is written to, but we need
16621 to prevent the compiler from moving optimal insertion point above
16622 eventual read from 256bit register. */
16623 subrtx_iterator::array_type array;
16624 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16625 if (ix86_check_avx256_register (*iter))
16626 return AVX_U128_DIRTY;
16628 return AVX_U128_ANY;
16631 /* Return mode that i387 must be switched into
16632 prior to the execution of insn. */
16634 static int
16635 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16637 enum attr_i387_cw mode;
16639 /* The mode UNINITIALIZED is used to store control word after a
16640 function call or ASM pattern. The mode ANY specify that function
16641 has no requirements on the control word and make no changes in the
16642 bits we are interested in. */
16644 if (CALL_P (insn)
16645 || (NONJUMP_INSN_P (insn)
16646 && (asm_noperands (PATTERN (insn)) >= 0
16647 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16648 return I387_CW_UNINITIALIZED;
16650 if (recog_memoized (insn) < 0)
16651 return I387_CW_ANY;
16653 mode = get_attr_i387_cw (insn);
16655 switch (entity)
16657 case I387_TRUNC:
16658 if (mode == I387_CW_TRUNC)
16659 return mode;
16660 break;
16662 case I387_FLOOR:
16663 if (mode == I387_CW_FLOOR)
16664 return mode;
16665 break;
16667 case I387_CEIL:
16668 if (mode == I387_CW_CEIL)
16669 return mode;
16670 break;
16672 case I387_MASK_PM:
16673 if (mode == I387_CW_MASK_PM)
16674 return mode;
16675 break;
16677 default:
16678 gcc_unreachable ();
16681 return I387_CW_ANY;
16684 /* Return mode that entity must be switched into
16685 prior to the execution of insn. */
16687 static int
16688 ix86_mode_needed (int entity, rtx_insn *insn)
16690 switch (entity)
16692 case AVX_U128:
16693 return ix86_avx_u128_mode_needed (insn);
16694 case I387_TRUNC:
16695 case I387_FLOOR:
16696 case I387_CEIL:
16697 case I387_MASK_PM:
16698 return ix86_i387_mode_needed (entity, insn);
16699 default:
16700 gcc_unreachable ();
16702 return 0;
16705 /* Check if a 256bit AVX register is referenced in stores. */
16707 static void
16708 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16710 if (ix86_check_avx256_register (dest))
16712 bool *used = (bool *) data;
16713 *used = true;
16717 /* Calculate mode of upper 128bit AVX registers after the insn. */
16719 static int
16720 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16722 rtx pat = PATTERN (insn);
16724 if (vzeroupper_operation (pat, VOIDmode)
16725 || vzeroall_operation (pat, VOIDmode))
16726 return AVX_U128_CLEAN;
16728 /* We know that state is clean after CALL insn if there are no
16729 256bit registers used in the function return register. */
16730 if (CALL_P (insn))
16732 bool avx_reg256_found = false;
16733 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16735 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16738 /* Otherwise, return current mode. Remember that if insn
16739 references AVX 256bit registers, the mode was already changed
16740 to DIRTY from MODE_NEEDED. */
16741 return mode;
16744 /* Return the mode that an insn results in. */
16746 static int
16747 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16749 switch (entity)
16751 case AVX_U128:
16752 return ix86_avx_u128_mode_after (mode, insn);
16753 case I387_TRUNC:
16754 case I387_FLOOR:
16755 case I387_CEIL:
16756 case I387_MASK_PM:
16757 return mode;
16758 default:
16759 gcc_unreachable ();
16763 static int
16764 ix86_avx_u128_mode_entry (void)
16766 tree arg;
16768 /* Entry mode is set to AVX_U128_DIRTY if there are
16769 256bit modes used in function arguments. */
16770 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16771 arg = TREE_CHAIN (arg))
16773 rtx incoming = DECL_INCOMING_RTL (arg);
16775 if (incoming && ix86_check_avx256_register (incoming))
16776 return AVX_U128_DIRTY;
16779 return AVX_U128_CLEAN;
16782 /* Return a mode that ENTITY is assumed to be
16783 switched to at function entry. */
16785 static int
16786 ix86_mode_entry (int entity)
16788 switch (entity)
16790 case AVX_U128:
16791 return ix86_avx_u128_mode_entry ();
16792 case I387_TRUNC:
16793 case I387_FLOOR:
16794 case I387_CEIL:
16795 case I387_MASK_PM:
16796 return I387_CW_ANY;
16797 default:
16798 gcc_unreachable ();
16802 static int
16803 ix86_avx_u128_mode_exit (void)
16805 rtx reg = crtl->return_rtx;
16807 /* Exit mode is set to AVX_U128_DIRTY if there are
16808 256bit modes used in the function return register. */
16809 if (reg && ix86_check_avx256_register (reg))
16810 return AVX_U128_DIRTY;
16812 return AVX_U128_CLEAN;
16815 /* Return a mode that ENTITY is assumed to be
16816 switched to at function exit. */
16818 static int
16819 ix86_mode_exit (int entity)
16821 switch (entity)
16823 case AVX_U128:
16824 return ix86_avx_u128_mode_exit ();
16825 case I387_TRUNC:
16826 case I387_FLOOR:
16827 case I387_CEIL:
16828 case I387_MASK_PM:
16829 return I387_CW_ANY;
16830 default:
16831 gcc_unreachable ();
16835 static int
16836 ix86_mode_priority (int, int n)
16838 return n;
16841 /* Output code to initialize control word copies used by trunc?f?i and
16842 rounding patterns. CURRENT_MODE is set to current control word,
16843 while NEW_MODE is set to new control word. */
16845 static void
16846 emit_i387_cw_initialization (int mode)
16848 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16849 rtx new_mode;
16851 enum ix86_stack_slot slot;
16853 rtx reg = gen_reg_rtx (HImode);
16855 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16856 emit_move_insn (reg, copy_rtx (stored_mode));
16858 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16859 || optimize_insn_for_size_p ())
16861 switch (mode)
16863 case I387_CW_TRUNC:
16864 /* round toward zero (truncate) */
16865 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16866 slot = SLOT_CW_TRUNC;
16867 break;
16869 case I387_CW_FLOOR:
16870 /* round down toward -oo */
16871 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16872 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16873 slot = SLOT_CW_FLOOR;
16874 break;
16876 case I387_CW_CEIL:
16877 /* round up toward +oo */
16878 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16879 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16880 slot = SLOT_CW_CEIL;
16881 break;
16883 case I387_CW_MASK_PM:
16884 /* mask precision exception for nearbyint() */
16885 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16886 slot = SLOT_CW_MASK_PM;
16887 break;
16889 default:
16890 gcc_unreachable ();
16893 else
16895 switch (mode)
16897 case I387_CW_TRUNC:
16898 /* round toward zero (truncate) */
16899 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16900 slot = SLOT_CW_TRUNC;
16901 break;
16903 case I387_CW_FLOOR:
16904 /* round down toward -oo */
16905 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16906 slot = SLOT_CW_FLOOR;
16907 break;
16909 case I387_CW_CEIL:
16910 /* round up toward +oo */
16911 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16912 slot = SLOT_CW_CEIL;
16913 break;
16915 case I387_CW_MASK_PM:
16916 /* mask precision exception for nearbyint() */
16917 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16918 slot = SLOT_CW_MASK_PM;
16919 break;
16921 default:
16922 gcc_unreachable ();
16926 gcc_assert (slot < MAX_386_STACK_LOCALS);
16928 new_mode = assign_386_stack_local (HImode, slot);
16929 emit_move_insn (new_mode, reg);
16932 /* Emit vzeroupper. */
16934 void
16935 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16937 int i;
16939 /* Cancel automatic vzeroupper insertion if there are
16940 live call-saved SSE registers at the insertion point. */
16942 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16943 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16944 return;
16946 if (TARGET_64BIT)
16947 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16948 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16949 return;
16951 emit_insn (gen_avx_vzeroupper ());
16954 /* Generate one or more insns to set ENTITY to MODE. */
16956 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16957 is the set of hard registers live at the point where the insn(s)
16958 are to be inserted. */
16960 static void
16961 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16962 HARD_REG_SET regs_live)
16964 switch (entity)
16966 case AVX_U128:
16967 if (mode == AVX_U128_CLEAN)
16968 ix86_avx_emit_vzeroupper (regs_live);
16969 break;
16970 case I387_TRUNC:
16971 case I387_FLOOR:
16972 case I387_CEIL:
16973 case I387_MASK_PM:
16974 if (mode != I387_CW_ANY
16975 && mode != I387_CW_UNINITIALIZED)
16976 emit_i387_cw_initialization (mode);
16977 break;
16978 default:
16979 gcc_unreachable ();
16983 /* Output code for INSN to convert a float to a signed int. OPERANDS
16984 are the insn operands. The output may be [HSD]Imode and the input
16985 operand may be [SDX]Fmode. */
16987 const char *
16988 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16990 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16991 int dimode_p = GET_MODE (operands[0]) == DImode;
16992 int round_mode = get_attr_i387_cw (insn);
16994 /* Jump through a hoop or two for DImode, since the hardware has no
16995 non-popping instruction. We used to do this a different way, but
16996 that was somewhat fragile and broke with post-reload splitters. */
16997 if ((dimode_p || fisttp) && !stack_top_dies)
16998 output_asm_insn ("fld\t%y1", operands);
17000 gcc_assert (STACK_TOP_P (operands[1]));
17001 gcc_assert (MEM_P (operands[0]));
17002 gcc_assert (GET_MODE (operands[1]) != TFmode);
17004 if (fisttp)
17005 output_asm_insn ("fisttp%Z0\t%0", operands);
17006 else
17008 if (round_mode != I387_CW_ANY)
17009 output_asm_insn ("fldcw\t%3", operands);
17010 if (stack_top_dies || dimode_p)
17011 output_asm_insn ("fistp%Z0\t%0", operands);
17012 else
17013 output_asm_insn ("fist%Z0\t%0", operands);
17014 if (round_mode != I387_CW_ANY)
17015 output_asm_insn ("fldcw\t%2", operands);
17018 return "";
17021 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17022 have the values zero or one, indicates the ffreep insn's operand
17023 from the OPERANDS array. */
17025 static const char *
17026 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17028 if (TARGET_USE_FFREEP)
17029 #ifdef HAVE_AS_IX86_FFREEP
17030 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17031 #else
17033 static char retval[32];
17034 int regno = REGNO (operands[opno]);
17036 gcc_assert (STACK_REGNO_P (regno));
17038 regno -= FIRST_STACK_REG;
17040 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17041 return retval;
17043 #endif
17045 return opno ? "fstp\t%y1" : "fstp\t%y0";
17049 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17050 should be used. UNORDERED_P is true when fucom should be used. */
17052 const char *
17053 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17055 int stack_top_dies;
17056 rtx cmp_op0, cmp_op1;
17057 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17059 if (eflags_p)
17061 cmp_op0 = operands[0];
17062 cmp_op1 = operands[1];
17064 else
17066 cmp_op0 = operands[1];
17067 cmp_op1 = operands[2];
17070 if (is_sse)
17072 if (GET_MODE (operands[0]) == SFmode)
17073 if (unordered_p)
17074 return "%vucomiss\t{%1, %0|%0, %1}";
17075 else
17076 return "%vcomiss\t{%1, %0|%0, %1}";
17077 else
17078 if (unordered_p)
17079 return "%vucomisd\t{%1, %0|%0, %1}";
17080 else
17081 return "%vcomisd\t{%1, %0|%0, %1}";
17084 gcc_assert (STACK_TOP_P (cmp_op0));
17086 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17088 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17090 if (stack_top_dies)
17092 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17093 return output_387_ffreep (operands, 1);
17095 else
17096 return "ftst\n\tfnstsw\t%0";
17099 if (STACK_REG_P (cmp_op1)
17100 && stack_top_dies
17101 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17102 && REGNO (cmp_op1) != FIRST_STACK_REG)
17104 /* If both the top of the 387 stack dies, and the other operand
17105 is also a stack register that dies, then this must be a
17106 `fcompp' float compare */
17108 if (eflags_p)
17110 /* There is no double popping fcomi variant. Fortunately,
17111 eflags is immune from the fstp's cc clobbering. */
17112 if (unordered_p)
17113 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17114 else
17115 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17116 return output_387_ffreep (operands, 0);
17118 else
17120 if (unordered_p)
17121 return "fucompp\n\tfnstsw\t%0";
17122 else
17123 return "fcompp\n\tfnstsw\t%0";
17126 else
17128 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17130 static const char * const alt[16] =
17132 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17133 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17134 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17135 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17137 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17138 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17139 NULL,
17140 NULL,
17142 "fcomi\t{%y1, %0|%0, %y1}",
17143 "fcomip\t{%y1, %0|%0, %y1}",
17144 "fucomi\t{%y1, %0|%0, %y1}",
17145 "fucomip\t{%y1, %0|%0, %y1}",
17147 NULL,
17148 NULL,
17149 NULL,
17150 NULL
17153 int mask;
17154 const char *ret;
17156 mask = eflags_p << 3;
17157 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17158 mask |= unordered_p << 1;
17159 mask |= stack_top_dies;
17161 gcc_assert (mask < 16);
17162 ret = alt[mask];
17163 gcc_assert (ret);
17165 return ret;
17169 void
17170 ix86_output_addr_vec_elt (FILE *file, int value)
17172 const char *directive = ASM_LONG;
17174 #ifdef ASM_QUAD
17175 if (TARGET_LP64)
17176 directive = ASM_QUAD;
17177 #else
17178 gcc_assert (!TARGET_64BIT);
17179 #endif
17181 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17184 void
17185 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17187 const char *directive = ASM_LONG;
17189 #ifdef ASM_QUAD
17190 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17191 directive = ASM_QUAD;
17192 #else
17193 gcc_assert (!TARGET_64BIT);
17194 #endif
17195 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17196 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17197 fprintf (file, "%s%s%d-%s%d\n",
17198 directive, LPREFIX, value, LPREFIX, rel);
17199 else if (HAVE_AS_GOTOFF_IN_DATA)
17200 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17201 #if TARGET_MACHO
17202 else if (TARGET_MACHO)
17204 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17205 machopic_output_function_base_name (file);
17206 putc ('\n', file);
17208 #endif
17209 else
17210 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17211 GOT_SYMBOL_NAME, LPREFIX, value);
17214 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17215 for the target. */
17217 void
17218 ix86_expand_clear (rtx dest)
17220 rtx tmp;
17222 /* We play register width games, which are only valid after reload. */
17223 gcc_assert (reload_completed);
17225 /* Avoid HImode and its attendant prefix byte. */
17226 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17227 dest = gen_rtx_REG (SImode, REGNO (dest));
17228 tmp = gen_rtx_SET (dest, const0_rtx);
17230 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17232 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17233 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17236 emit_insn (tmp);
17239 /* X is an unchanging MEM. If it is a constant pool reference, return
17240 the constant pool rtx, else NULL. */
17243 maybe_get_pool_constant (rtx x)
17245 x = ix86_delegitimize_address (XEXP (x, 0));
17247 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17248 return get_pool_constant (x);
17250 return NULL_RTX;
17253 void
17254 ix86_expand_move (machine_mode mode, rtx operands[])
17256 rtx op0, op1;
17257 enum tls_model model;
17259 op0 = operands[0];
17260 op1 = operands[1];
17262 if (GET_CODE (op1) == SYMBOL_REF)
17264 rtx tmp;
17266 model = SYMBOL_REF_TLS_MODEL (op1);
17267 if (model)
17269 op1 = legitimize_tls_address (op1, model, true);
17270 op1 = force_operand (op1, op0);
17271 if (op1 == op0)
17272 return;
17273 op1 = convert_to_mode (mode, op1, 1);
17275 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17276 op1 = tmp;
17278 else if (GET_CODE (op1) == CONST
17279 && GET_CODE (XEXP (op1, 0)) == PLUS
17280 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17282 rtx addend = XEXP (XEXP (op1, 0), 1);
17283 rtx symbol = XEXP (XEXP (op1, 0), 0);
17284 rtx tmp;
17286 model = SYMBOL_REF_TLS_MODEL (symbol);
17287 if (model)
17288 tmp = legitimize_tls_address (symbol, model, true);
17289 else
17290 tmp = legitimize_pe_coff_symbol (symbol, true);
17292 if (tmp)
17294 tmp = force_operand (tmp, NULL);
17295 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17296 op0, 1, OPTAB_DIRECT);
17297 if (tmp == op0)
17298 return;
17299 op1 = convert_to_mode (mode, tmp, 1);
17303 if ((flag_pic || MACHOPIC_INDIRECT)
17304 && symbolic_operand (op1, mode))
17306 if (TARGET_MACHO && !TARGET_64BIT)
17308 #if TARGET_MACHO
17309 /* dynamic-no-pic */
17310 if (MACHOPIC_INDIRECT)
17312 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17313 ? op0 : gen_reg_rtx (Pmode);
17314 op1 = machopic_indirect_data_reference (op1, temp);
17315 if (MACHOPIC_PURE)
17316 op1 = machopic_legitimize_pic_address (op1, mode,
17317 temp == op1 ? 0 : temp);
17319 if (op0 != op1 && GET_CODE (op0) != MEM)
17321 rtx insn = gen_rtx_SET (op0, op1);
17322 emit_insn (insn);
17323 return;
17325 if (GET_CODE (op0) == MEM)
17326 op1 = force_reg (Pmode, op1);
17327 else
17329 rtx temp = op0;
17330 if (GET_CODE (temp) != REG)
17331 temp = gen_reg_rtx (Pmode);
17332 temp = legitimize_pic_address (op1, temp);
17333 if (temp == op0)
17334 return;
17335 op1 = temp;
17337 /* dynamic-no-pic */
17338 #endif
17340 else
17342 if (MEM_P (op0))
17343 op1 = force_reg (mode, op1);
17344 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17346 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17347 op1 = legitimize_pic_address (op1, reg);
17348 if (op0 == op1)
17349 return;
17350 op1 = convert_to_mode (mode, op1, 1);
17354 else
17356 if (MEM_P (op0)
17357 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17358 || !push_operand (op0, mode))
17359 && MEM_P (op1))
17360 op1 = force_reg (mode, op1);
17362 if (push_operand (op0, mode)
17363 && ! general_no_elim_operand (op1, mode))
17364 op1 = copy_to_mode_reg (mode, op1);
17366 /* Force large constants in 64bit compilation into register
17367 to get them CSEed. */
17368 if (can_create_pseudo_p ()
17369 && (mode == DImode) && TARGET_64BIT
17370 && immediate_operand (op1, mode)
17371 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17372 && !register_operand (op0, mode)
17373 && optimize)
17374 op1 = copy_to_mode_reg (mode, op1);
17376 if (can_create_pseudo_p ()
17377 && CONST_DOUBLE_P (op1))
17379 /* If we are loading a floating point constant to a register,
17380 force the value to memory now, since we'll get better code
17381 out the back end. */
17383 op1 = validize_mem (force_const_mem (mode, op1));
17384 if (!register_operand (op0, mode))
17386 rtx temp = gen_reg_rtx (mode);
17387 emit_insn (gen_rtx_SET (temp, op1));
17388 emit_move_insn (op0, temp);
17389 return;
17394 emit_insn (gen_rtx_SET (op0, op1));
17397 void
17398 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17400 rtx op0 = operands[0], op1 = operands[1];
17401 unsigned int align = GET_MODE_ALIGNMENT (mode);
17403 if (push_operand (op0, VOIDmode))
17404 op0 = emit_move_resolve_push (mode, op0);
17406 /* Force constants other than zero into memory. We do not know how
17407 the instructions used to build constants modify the upper 64 bits
17408 of the register, once we have that information we may be able
17409 to handle some of them more efficiently. */
17410 if (can_create_pseudo_p ()
17411 && register_operand (op0, mode)
17412 && (CONSTANT_P (op1)
17413 || (GET_CODE (op1) == SUBREG
17414 && CONSTANT_P (SUBREG_REG (op1))))
17415 && !standard_sse_constant_p (op1))
17416 op1 = validize_mem (force_const_mem (mode, op1));
17418 /* We need to check memory alignment for SSE mode since attribute
17419 can make operands unaligned. */
17420 if (can_create_pseudo_p ()
17421 && SSE_REG_MODE_P (mode)
17422 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17423 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17425 rtx tmp[2];
17427 /* ix86_expand_vector_move_misalign() does not like constants ... */
17428 if (CONSTANT_P (op1)
17429 || (GET_CODE (op1) == SUBREG
17430 && CONSTANT_P (SUBREG_REG (op1))))
17431 op1 = validize_mem (force_const_mem (mode, op1));
17433 /* ... nor both arguments in memory. */
17434 if (!register_operand (op0, mode)
17435 && !register_operand (op1, mode))
17436 op1 = force_reg (mode, op1);
17438 tmp[0] = op0; tmp[1] = op1;
17439 ix86_expand_vector_move_misalign (mode, tmp);
17440 return;
17443 /* Make operand1 a register if it isn't already. */
17444 if (can_create_pseudo_p ()
17445 && !register_operand (op0, mode)
17446 && !register_operand (op1, mode))
17448 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17449 return;
17452 emit_insn (gen_rtx_SET (op0, op1));
17455 /* Split 32-byte AVX unaligned load and store if needed. */
17457 static void
17458 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17460 rtx m;
17461 rtx (*extract) (rtx, rtx, rtx);
17462 rtx (*load_unaligned) (rtx, rtx);
17463 rtx (*store_unaligned) (rtx, rtx);
17464 machine_mode mode;
17466 switch (GET_MODE (op0))
17468 default:
17469 gcc_unreachable ();
17470 case V32QImode:
17471 extract = gen_avx_vextractf128v32qi;
17472 load_unaligned = gen_avx_loaddquv32qi;
17473 store_unaligned = gen_avx_storedquv32qi;
17474 mode = V16QImode;
17475 break;
17476 case V8SFmode:
17477 extract = gen_avx_vextractf128v8sf;
17478 load_unaligned = gen_avx_loadups256;
17479 store_unaligned = gen_avx_storeups256;
17480 mode = V4SFmode;
17481 break;
17482 case V4DFmode:
17483 extract = gen_avx_vextractf128v4df;
17484 load_unaligned = gen_avx_loadupd256;
17485 store_unaligned = gen_avx_storeupd256;
17486 mode = V2DFmode;
17487 break;
17490 if (MEM_P (op1))
17492 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17493 && optimize_insn_for_speed_p ())
17495 rtx r = gen_reg_rtx (mode);
17496 m = adjust_address (op1, mode, 0);
17497 emit_move_insn (r, m);
17498 m = adjust_address (op1, mode, 16);
17499 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17500 emit_move_insn (op0, r);
17502 /* Normal *mov<mode>_internal pattern will handle
17503 unaligned loads just fine if misaligned_operand
17504 is true, and without the UNSPEC it can be combined
17505 with arithmetic instructions. */
17506 else if (misaligned_operand (op1, GET_MODE (op1)))
17507 emit_insn (gen_rtx_SET (op0, op1));
17508 else
17509 emit_insn (load_unaligned (op0, op1));
17511 else if (MEM_P (op0))
17513 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17514 && optimize_insn_for_speed_p ())
17516 m = adjust_address (op0, mode, 0);
17517 emit_insn (extract (m, op1, const0_rtx));
17518 m = adjust_address (op0, mode, 16);
17519 emit_insn (extract (m, op1, const1_rtx));
17521 else
17522 emit_insn (store_unaligned (op0, op1));
17524 else
17525 gcc_unreachable ();
17528 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17529 straight to ix86_expand_vector_move. */
17530 /* Code generation for scalar reg-reg moves of single and double precision data:
17531 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17532 movaps reg, reg
17533 else
17534 movss reg, reg
17535 if (x86_sse_partial_reg_dependency == true)
17536 movapd reg, reg
17537 else
17538 movsd reg, reg
17540 Code generation for scalar loads of double precision data:
17541 if (x86_sse_split_regs == true)
17542 movlpd mem, reg (gas syntax)
17543 else
17544 movsd mem, reg
17546 Code generation for unaligned packed loads of single precision data
17547 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17548 if (x86_sse_unaligned_move_optimal)
17549 movups mem, reg
17551 if (x86_sse_partial_reg_dependency == true)
17553 xorps reg, reg
17554 movlps mem, reg
17555 movhps mem+8, reg
17557 else
17559 movlps mem, reg
17560 movhps mem+8, reg
17563 Code generation for unaligned packed loads of double precision data
17564 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17565 if (x86_sse_unaligned_move_optimal)
17566 movupd mem, reg
17568 if (x86_sse_split_regs == true)
17570 movlpd mem, reg
17571 movhpd mem+8, reg
17573 else
17575 movsd mem, reg
17576 movhpd mem+8, reg
17580 void
17581 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17583 rtx op0, op1, orig_op0 = NULL_RTX, m;
17584 rtx (*load_unaligned) (rtx, rtx);
17585 rtx (*store_unaligned) (rtx, rtx);
17587 op0 = operands[0];
17588 op1 = operands[1];
17590 if (GET_MODE_SIZE (mode) == 64)
17592 switch (GET_MODE_CLASS (mode))
17594 case MODE_VECTOR_INT:
17595 case MODE_INT:
17596 if (GET_MODE (op0) != V16SImode)
17598 if (!MEM_P (op0))
17600 orig_op0 = op0;
17601 op0 = gen_reg_rtx (V16SImode);
17603 else
17604 op0 = gen_lowpart (V16SImode, op0);
17606 op1 = gen_lowpart (V16SImode, op1);
17607 /* FALLTHRU */
17609 case MODE_VECTOR_FLOAT:
17610 switch (GET_MODE (op0))
17612 default:
17613 gcc_unreachable ();
17614 case V16SImode:
17615 load_unaligned = gen_avx512f_loaddquv16si;
17616 store_unaligned = gen_avx512f_storedquv16si;
17617 break;
17618 case V16SFmode:
17619 load_unaligned = gen_avx512f_loadups512;
17620 store_unaligned = gen_avx512f_storeups512;
17621 break;
17622 case V8DFmode:
17623 load_unaligned = gen_avx512f_loadupd512;
17624 store_unaligned = gen_avx512f_storeupd512;
17625 break;
17628 if (MEM_P (op1))
17629 emit_insn (load_unaligned (op0, op1));
17630 else if (MEM_P (op0))
17631 emit_insn (store_unaligned (op0, op1));
17632 else
17633 gcc_unreachable ();
17634 if (orig_op0)
17635 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17636 break;
17638 default:
17639 gcc_unreachable ();
17642 return;
17645 if (TARGET_AVX
17646 && GET_MODE_SIZE (mode) == 32)
17648 switch (GET_MODE_CLASS (mode))
17650 case MODE_VECTOR_INT:
17651 case MODE_INT:
17652 if (GET_MODE (op0) != V32QImode)
17654 if (!MEM_P (op0))
17656 orig_op0 = op0;
17657 op0 = gen_reg_rtx (V32QImode);
17659 else
17660 op0 = gen_lowpart (V32QImode, op0);
17662 op1 = gen_lowpart (V32QImode, op1);
17663 /* FALLTHRU */
17665 case MODE_VECTOR_FLOAT:
17666 ix86_avx256_split_vector_move_misalign (op0, op1);
17667 if (orig_op0)
17668 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17669 break;
17671 default:
17672 gcc_unreachable ();
17675 return;
17678 if (MEM_P (op1))
17680 /* Normal *mov<mode>_internal pattern will handle
17681 unaligned loads just fine if misaligned_operand
17682 is true, and without the UNSPEC it can be combined
17683 with arithmetic instructions. */
17684 if (TARGET_AVX
17685 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17686 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17687 && misaligned_operand (op1, GET_MODE (op1)))
17688 emit_insn (gen_rtx_SET (op0, op1));
17689 /* ??? If we have typed data, then it would appear that using
17690 movdqu is the only way to get unaligned data loaded with
17691 integer type. */
17692 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17694 if (GET_MODE (op0) != V16QImode)
17696 orig_op0 = op0;
17697 op0 = gen_reg_rtx (V16QImode);
17699 op1 = gen_lowpart (V16QImode, op1);
17700 /* We will eventually emit movups based on insn attributes. */
17701 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17702 if (orig_op0)
17703 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17705 else if (TARGET_SSE2 && mode == V2DFmode)
17707 rtx zero;
17709 if (TARGET_AVX
17710 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17711 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17712 || optimize_insn_for_size_p ())
17714 /* We will eventually emit movups based on insn attributes. */
17715 emit_insn (gen_sse2_loadupd (op0, op1));
17716 return;
17719 /* When SSE registers are split into halves, we can avoid
17720 writing to the top half twice. */
17721 if (TARGET_SSE_SPLIT_REGS)
17723 emit_clobber (op0);
17724 zero = op0;
17726 else
17728 /* ??? Not sure about the best option for the Intel chips.
17729 The following would seem to satisfy; the register is
17730 entirely cleared, breaking the dependency chain. We
17731 then store to the upper half, with a dependency depth
17732 of one. A rumor has it that Intel recommends two movsd
17733 followed by an unpacklpd, but this is unconfirmed. And
17734 given that the dependency depth of the unpacklpd would
17735 still be one, I'm not sure why this would be better. */
17736 zero = CONST0_RTX (V2DFmode);
17739 m = adjust_address (op1, DFmode, 0);
17740 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17741 m = adjust_address (op1, DFmode, 8);
17742 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17744 else
17746 rtx t;
17748 if (TARGET_AVX
17749 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17750 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17751 || optimize_insn_for_size_p ())
17753 if (GET_MODE (op0) != V4SFmode)
17755 orig_op0 = op0;
17756 op0 = gen_reg_rtx (V4SFmode);
17758 op1 = gen_lowpart (V4SFmode, op1);
17759 emit_insn (gen_sse_loadups (op0, op1));
17760 if (orig_op0)
17761 emit_move_insn (orig_op0,
17762 gen_lowpart (GET_MODE (orig_op0), op0));
17763 return;
17766 if (mode != V4SFmode)
17767 t = gen_reg_rtx (V4SFmode);
17768 else
17769 t = op0;
17771 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17772 emit_move_insn (t, CONST0_RTX (V4SFmode));
17773 else
17774 emit_clobber (t);
17776 m = adjust_address (op1, V2SFmode, 0);
17777 emit_insn (gen_sse_loadlps (t, t, m));
17778 m = adjust_address (op1, V2SFmode, 8);
17779 emit_insn (gen_sse_loadhps (t, t, m));
17780 if (mode != V4SFmode)
17781 emit_move_insn (op0, gen_lowpart (mode, t));
17784 else if (MEM_P (op0))
17786 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17788 op0 = gen_lowpart (V16QImode, op0);
17789 op1 = gen_lowpart (V16QImode, op1);
17790 /* We will eventually emit movups based on insn attributes. */
17791 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17793 else if (TARGET_SSE2 && mode == V2DFmode)
17795 if (TARGET_AVX
17796 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17797 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17798 || optimize_insn_for_size_p ())
17799 /* We will eventually emit movups based on insn attributes. */
17800 emit_insn (gen_sse2_storeupd (op0, op1));
17801 else
17803 m = adjust_address (op0, DFmode, 0);
17804 emit_insn (gen_sse2_storelpd (m, op1));
17805 m = adjust_address (op0, DFmode, 8);
17806 emit_insn (gen_sse2_storehpd (m, op1));
17809 else
17811 if (mode != V4SFmode)
17812 op1 = gen_lowpart (V4SFmode, op1);
17814 if (TARGET_AVX
17815 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17816 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17817 || optimize_insn_for_size_p ())
17819 op0 = gen_lowpart (V4SFmode, op0);
17820 emit_insn (gen_sse_storeups (op0, op1));
17822 else
17824 m = adjust_address (op0, V2SFmode, 0);
17825 emit_insn (gen_sse_storelps (m, op1));
17826 m = adjust_address (op0, V2SFmode, 8);
17827 emit_insn (gen_sse_storehps (m, op1));
17831 else
17832 gcc_unreachable ();
17835 /* Helper function of ix86_fixup_binary_operands to canonicalize
17836 operand order. Returns true if the operands should be swapped. */
17838 static bool
17839 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17840 rtx operands[])
17842 rtx dst = operands[0];
17843 rtx src1 = operands[1];
17844 rtx src2 = operands[2];
17846 /* If the operation is not commutative, we can't do anything. */
17847 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17848 return false;
17850 /* Highest priority is that src1 should match dst. */
17851 if (rtx_equal_p (dst, src1))
17852 return false;
17853 if (rtx_equal_p (dst, src2))
17854 return true;
17856 /* Next highest priority is that immediate constants come second. */
17857 if (immediate_operand (src2, mode))
17858 return false;
17859 if (immediate_operand (src1, mode))
17860 return true;
17862 /* Lowest priority is that memory references should come second. */
17863 if (MEM_P (src2))
17864 return false;
17865 if (MEM_P (src1))
17866 return true;
17868 return false;
17872 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17873 destination to use for the operation. If different from the true
17874 destination in operands[0], a copy operation will be required. */
17877 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17878 rtx operands[])
17880 rtx dst = operands[0];
17881 rtx src1 = operands[1];
17882 rtx src2 = operands[2];
17884 /* Canonicalize operand order. */
17885 if (ix86_swap_binary_operands_p (code, mode, operands))
17887 /* It is invalid to swap operands of different modes. */
17888 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17890 std::swap (src1, src2);
17893 /* Both source operands cannot be in memory. */
17894 if (MEM_P (src1) && MEM_P (src2))
17896 /* Optimization: Only read from memory once. */
17897 if (rtx_equal_p (src1, src2))
17899 src2 = force_reg (mode, src2);
17900 src1 = src2;
17902 else if (rtx_equal_p (dst, src1))
17903 src2 = force_reg (mode, src2);
17904 else
17905 src1 = force_reg (mode, src1);
17908 /* If the destination is memory, and we do not have matching source
17909 operands, do things in registers. */
17910 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17911 dst = gen_reg_rtx (mode);
17913 /* Source 1 cannot be a constant. */
17914 if (CONSTANT_P (src1))
17915 src1 = force_reg (mode, src1);
17917 /* Source 1 cannot be a non-matching memory. */
17918 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17919 src1 = force_reg (mode, src1);
17921 /* Improve address combine. */
17922 if (code == PLUS
17923 && GET_MODE_CLASS (mode) == MODE_INT
17924 && MEM_P (src2))
17925 src2 = force_reg (mode, src2);
17927 operands[1] = src1;
17928 operands[2] = src2;
17929 return dst;
17932 /* Similarly, but assume that the destination has already been
17933 set up properly. */
17935 void
17936 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17937 machine_mode mode, rtx operands[])
17939 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17940 gcc_assert (dst == operands[0]);
17943 /* Attempt to expand a binary operator. Make the expansion closer to the
17944 actual machine, then just general_operand, which will allow 3 separate
17945 memory references (one output, two input) in a single insn. */
17947 void
17948 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
17949 rtx operands[])
17951 rtx src1, src2, dst, op, clob;
17953 dst = ix86_fixup_binary_operands (code, mode, operands);
17954 src1 = operands[1];
17955 src2 = operands[2];
17957 /* Emit the instruction. */
17959 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17961 if (reload_completed
17962 && code == PLUS
17963 && !rtx_equal_p (dst, src1))
17965 /* This is going to be an LEA; avoid splitting it later. */
17966 emit_insn (op);
17968 else
17970 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17971 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17974 /* Fix up the destination if needed. */
17975 if (dst != operands[0])
17976 emit_move_insn (operands[0], dst);
17979 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17980 the given OPERANDS. */
17982 void
17983 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
17984 rtx operands[])
17986 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17987 if (GET_CODE (operands[1]) == SUBREG)
17989 op1 = operands[1];
17990 op2 = operands[2];
17992 else if (GET_CODE (operands[2]) == SUBREG)
17994 op1 = operands[2];
17995 op2 = operands[1];
17997 /* Optimize (__m128i) d | (__m128i) e and similar code
17998 when d and e are float vectors into float vector logical
17999 insn. In C/C++ without using intrinsics there is no other way
18000 to express vector logical operation on float vectors than
18001 to cast them temporarily to integer vectors. */
18002 if (op1
18003 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18004 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18005 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18006 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18007 && SUBREG_BYTE (op1) == 0
18008 && (GET_CODE (op2) == CONST_VECTOR
18009 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18010 && SUBREG_BYTE (op2) == 0))
18011 && can_create_pseudo_p ())
18013 rtx dst;
18014 switch (GET_MODE (SUBREG_REG (op1)))
18016 case V4SFmode:
18017 case V8SFmode:
18018 case V16SFmode:
18019 case V2DFmode:
18020 case V4DFmode:
18021 case V8DFmode:
18022 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18023 if (GET_CODE (op2) == CONST_VECTOR)
18025 op2 = gen_lowpart (GET_MODE (dst), op2);
18026 op2 = force_reg (GET_MODE (dst), op2);
18028 else
18030 op1 = operands[1];
18031 op2 = SUBREG_REG (operands[2]);
18032 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18033 op2 = force_reg (GET_MODE (dst), op2);
18035 op1 = SUBREG_REG (op1);
18036 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18037 op1 = force_reg (GET_MODE (dst), op1);
18038 emit_insn (gen_rtx_SET (dst,
18039 gen_rtx_fmt_ee (code, GET_MODE (dst),
18040 op1, op2)));
18041 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18042 return;
18043 default:
18044 break;
18047 if (!nonimmediate_operand (operands[1], mode))
18048 operands[1] = force_reg (mode, operands[1]);
18049 if (!nonimmediate_operand (operands[2], mode))
18050 operands[2] = force_reg (mode, operands[2]);
18051 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18052 emit_insn (gen_rtx_SET (operands[0],
18053 gen_rtx_fmt_ee (code, mode, operands[1],
18054 operands[2])));
18057 /* Return TRUE or FALSE depending on whether the binary operator meets the
18058 appropriate constraints. */
18060 bool
18061 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18062 rtx operands[3])
18064 rtx dst = operands[0];
18065 rtx src1 = operands[1];
18066 rtx src2 = operands[2];
18068 /* Both source operands cannot be in memory. */
18069 if (MEM_P (src1) && MEM_P (src2))
18070 return false;
18072 /* Canonicalize operand order for commutative operators. */
18073 if (ix86_swap_binary_operands_p (code, mode, operands))
18074 std::swap (src1, src2);
18076 /* If the destination is memory, we must have a matching source operand. */
18077 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18078 return false;
18080 /* Source 1 cannot be a constant. */
18081 if (CONSTANT_P (src1))
18082 return false;
18084 /* Source 1 cannot be a non-matching memory. */
18085 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18086 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18087 return (code == AND
18088 && (mode == HImode
18089 || mode == SImode
18090 || (TARGET_64BIT && mode == DImode))
18091 && satisfies_constraint_L (src2));
18093 return true;
18096 /* Attempt to expand a unary operator. Make the expansion closer to the
18097 actual machine, then just general_operand, which will allow 2 separate
18098 memory references (one output, one input) in a single insn. */
18100 void
18101 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18102 rtx operands[])
18104 bool matching_memory = false;
18105 rtx src, dst, op, clob;
18107 dst = operands[0];
18108 src = operands[1];
18110 /* If the destination is memory, and we do not have matching source
18111 operands, do things in registers. */
18112 if (MEM_P (dst))
18114 if (rtx_equal_p (dst, src))
18115 matching_memory = true;
18116 else
18117 dst = gen_reg_rtx (mode);
18120 /* When source operand is memory, destination must match. */
18121 if (MEM_P (src) && !matching_memory)
18122 src = force_reg (mode, src);
18124 /* Emit the instruction. */
18126 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18128 if (code == NOT)
18129 emit_insn (op);
18130 else
18132 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18133 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18136 /* Fix up the destination if needed. */
18137 if (dst != operands[0])
18138 emit_move_insn (operands[0], dst);
18141 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18142 divisor are within the range [0-255]. */
18144 void
18145 ix86_split_idivmod (machine_mode mode, rtx operands[],
18146 bool signed_p)
18148 rtx_code_label *end_label, *qimode_label;
18149 rtx insn, div, mod;
18150 rtx scratch, tmp0, tmp1, tmp2;
18151 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18152 rtx (*gen_zero_extend) (rtx, rtx);
18153 rtx (*gen_test_ccno_1) (rtx, rtx);
18155 switch (mode)
18157 case SImode:
18158 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18159 gen_test_ccno_1 = gen_testsi_ccno_1;
18160 gen_zero_extend = gen_zero_extendqisi2;
18161 break;
18162 case DImode:
18163 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18164 gen_test_ccno_1 = gen_testdi_ccno_1;
18165 gen_zero_extend = gen_zero_extendqidi2;
18166 break;
18167 default:
18168 gcc_unreachable ();
18171 end_label = gen_label_rtx ();
18172 qimode_label = gen_label_rtx ();
18174 scratch = gen_reg_rtx (mode);
18176 /* Use 8bit unsigned divimod if dividend and divisor are within
18177 the range [0-255]. */
18178 emit_move_insn (scratch, operands[2]);
18179 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18180 scratch, 1, OPTAB_DIRECT);
18181 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18182 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18183 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18184 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18185 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18186 pc_rtx);
18187 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18188 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18189 JUMP_LABEL (insn) = qimode_label;
18191 /* Generate original signed/unsigned divimod. */
18192 div = gen_divmod4_1 (operands[0], operands[1],
18193 operands[2], operands[3]);
18194 emit_insn (div);
18196 /* Branch to the end. */
18197 emit_jump_insn (gen_jump (end_label));
18198 emit_barrier ();
18200 /* Generate 8bit unsigned divide. */
18201 emit_label (qimode_label);
18202 /* Don't use operands[0] for result of 8bit divide since not all
18203 registers support QImode ZERO_EXTRACT. */
18204 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18205 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18206 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18207 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18209 if (signed_p)
18211 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18212 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18214 else
18216 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18217 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18220 /* Extract remainder from AH. */
18221 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18222 if (REG_P (operands[1]))
18223 insn = emit_move_insn (operands[1], tmp1);
18224 else
18226 /* Need a new scratch register since the old one has result
18227 of 8bit divide. */
18228 scratch = gen_reg_rtx (mode);
18229 emit_move_insn (scratch, tmp1);
18230 insn = emit_move_insn (operands[1], scratch);
18232 set_unique_reg_note (insn, REG_EQUAL, mod);
18234 /* Zero extend quotient from AL. */
18235 tmp1 = gen_lowpart (QImode, tmp0);
18236 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18237 set_unique_reg_note (insn, REG_EQUAL, div);
18239 emit_label (end_label);
18242 #define LEA_MAX_STALL (3)
18243 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18245 /* Increase given DISTANCE in half-cycles according to
18246 dependencies between PREV and NEXT instructions.
18247 Add 1 half-cycle if there is no dependency and
18248 go to next cycle if there is some dependecy. */
18250 static unsigned int
18251 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18253 df_ref def, use;
18255 if (!prev || !next)
18256 return distance + (distance & 1) + 2;
18258 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18259 return distance + 1;
18261 FOR_EACH_INSN_USE (use, next)
18262 FOR_EACH_INSN_DEF (def, prev)
18263 if (!DF_REF_IS_ARTIFICIAL (def)
18264 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18265 return distance + (distance & 1) + 2;
18267 return distance + 1;
18270 /* Function checks if instruction INSN defines register number
18271 REGNO1 or REGNO2. */
18273 static bool
18274 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18275 rtx_insn *insn)
18277 df_ref def;
18279 FOR_EACH_INSN_DEF (def, insn)
18280 if (DF_REF_REG_DEF_P (def)
18281 && !DF_REF_IS_ARTIFICIAL (def)
18282 && (regno1 == DF_REF_REGNO (def)
18283 || regno2 == DF_REF_REGNO (def)))
18284 return true;
18286 return false;
18289 /* Function checks if instruction INSN uses register number
18290 REGNO as a part of address expression. */
18292 static bool
18293 insn_uses_reg_mem (unsigned int regno, rtx insn)
18295 df_ref use;
18297 FOR_EACH_INSN_USE (use, insn)
18298 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18299 return true;
18301 return false;
18304 /* Search backward for non-agu definition of register number REGNO1
18305 or register number REGNO2 in basic block starting from instruction
18306 START up to head of basic block or instruction INSN.
18308 Function puts true value into *FOUND var if definition was found
18309 and false otherwise.
18311 Distance in half-cycles between START and found instruction or head
18312 of BB is added to DISTANCE and returned. */
18314 static int
18315 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18316 rtx_insn *insn, int distance,
18317 rtx_insn *start, bool *found)
18319 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18320 rtx_insn *prev = start;
18321 rtx_insn *next = NULL;
18323 *found = false;
18325 while (prev
18326 && prev != insn
18327 && distance < LEA_SEARCH_THRESHOLD)
18329 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18331 distance = increase_distance (prev, next, distance);
18332 if (insn_defines_reg (regno1, regno2, prev))
18334 if (recog_memoized (prev) < 0
18335 || get_attr_type (prev) != TYPE_LEA)
18337 *found = true;
18338 return distance;
18342 next = prev;
18344 if (prev == BB_HEAD (bb))
18345 break;
18347 prev = PREV_INSN (prev);
18350 return distance;
18353 /* Search backward for non-agu definition of register number REGNO1
18354 or register number REGNO2 in INSN's basic block until
18355 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18356 2. Reach neighbour BBs boundary, or
18357 3. Reach agu definition.
18358 Returns the distance between the non-agu definition point and INSN.
18359 If no definition point, returns -1. */
18361 static int
18362 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18363 rtx_insn *insn)
18365 basic_block bb = BLOCK_FOR_INSN (insn);
18366 int distance = 0;
18367 bool found = false;
18369 if (insn != BB_HEAD (bb))
18370 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18371 distance, PREV_INSN (insn),
18372 &found);
18374 if (!found && distance < LEA_SEARCH_THRESHOLD)
18376 edge e;
18377 edge_iterator ei;
18378 bool simple_loop = false;
18380 FOR_EACH_EDGE (e, ei, bb->preds)
18381 if (e->src == bb)
18383 simple_loop = true;
18384 break;
18387 if (simple_loop)
18388 distance = distance_non_agu_define_in_bb (regno1, regno2,
18389 insn, distance,
18390 BB_END (bb), &found);
18391 else
18393 int shortest_dist = -1;
18394 bool found_in_bb = false;
18396 FOR_EACH_EDGE (e, ei, bb->preds)
18398 int bb_dist
18399 = distance_non_agu_define_in_bb (regno1, regno2,
18400 insn, distance,
18401 BB_END (e->src),
18402 &found_in_bb);
18403 if (found_in_bb)
18405 if (shortest_dist < 0)
18406 shortest_dist = bb_dist;
18407 else if (bb_dist > 0)
18408 shortest_dist = MIN (bb_dist, shortest_dist);
18410 found = true;
18414 distance = shortest_dist;
18418 /* get_attr_type may modify recog data. We want to make sure
18419 that recog data is valid for instruction INSN, on which
18420 distance_non_agu_define is called. INSN is unchanged here. */
18421 extract_insn_cached (insn);
18423 if (!found)
18424 return -1;
18426 return distance >> 1;
18429 /* Return the distance in half-cycles between INSN and the next
18430 insn that uses register number REGNO in memory address added
18431 to DISTANCE. Return -1 if REGNO0 is set.
18433 Put true value into *FOUND if register usage was found and
18434 false otherwise.
18435 Put true value into *REDEFINED if register redefinition was
18436 found and false otherwise. */
18438 static int
18439 distance_agu_use_in_bb (unsigned int regno,
18440 rtx_insn *insn, int distance, rtx_insn *start,
18441 bool *found, bool *redefined)
18443 basic_block bb = NULL;
18444 rtx_insn *next = start;
18445 rtx_insn *prev = NULL;
18447 *found = false;
18448 *redefined = false;
18450 if (start != NULL_RTX)
18452 bb = BLOCK_FOR_INSN (start);
18453 if (start != BB_HEAD (bb))
18454 /* If insn and start belong to the same bb, set prev to insn,
18455 so the call to increase_distance will increase the distance
18456 between insns by 1. */
18457 prev = insn;
18460 while (next
18461 && next != insn
18462 && distance < LEA_SEARCH_THRESHOLD)
18464 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18466 distance = increase_distance(prev, next, distance);
18467 if (insn_uses_reg_mem (regno, next))
18469 /* Return DISTANCE if OP0 is used in memory
18470 address in NEXT. */
18471 *found = true;
18472 return distance;
18475 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18477 /* Return -1 if OP0 is set in NEXT. */
18478 *redefined = true;
18479 return -1;
18482 prev = next;
18485 if (next == BB_END (bb))
18486 break;
18488 next = NEXT_INSN (next);
18491 return distance;
18494 /* Return the distance between INSN and the next insn that uses
18495 register number REGNO0 in memory address. Return -1 if no such
18496 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18498 static int
18499 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18501 basic_block bb = BLOCK_FOR_INSN (insn);
18502 int distance = 0;
18503 bool found = false;
18504 bool redefined = false;
18506 if (insn != BB_END (bb))
18507 distance = distance_agu_use_in_bb (regno0, insn, distance,
18508 NEXT_INSN (insn),
18509 &found, &redefined);
18511 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18513 edge e;
18514 edge_iterator ei;
18515 bool simple_loop = false;
18517 FOR_EACH_EDGE (e, ei, bb->succs)
18518 if (e->dest == bb)
18520 simple_loop = true;
18521 break;
18524 if (simple_loop)
18525 distance = distance_agu_use_in_bb (regno0, insn,
18526 distance, BB_HEAD (bb),
18527 &found, &redefined);
18528 else
18530 int shortest_dist = -1;
18531 bool found_in_bb = false;
18532 bool redefined_in_bb = false;
18534 FOR_EACH_EDGE (e, ei, bb->succs)
18536 int bb_dist
18537 = distance_agu_use_in_bb (regno0, insn,
18538 distance, BB_HEAD (e->dest),
18539 &found_in_bb, &redefined_in_bb);
18540 if (found_in_bb)
18542 if (shortest_dist < 0)
18543 shortest_dist = bb_dist;
18544 else if (bb_dist > 0)
18545 shortest_dist = MIN (bb_dist, shortest_dist);
18547 found = true;
18551 distance = shortest_dist;
18555 if (!found || redefined)
18556 return -1;
18558 return distance >> 1;
18561 /* Define this macro to tune LEA priority vs ADD, it take effect when
18562 there is a dilemma of choicing LEA or ADD
18563 Negative value: ADD is more preferred than LEA
18564 Zero: Netrual
18565 Positive value: LEA is more preferred than ADD*/
18566 #define IX86_LEA_PRIORITY 0
18568 /* Return true if usage of lea INSN has performance advantage
18569 over a sequence of instructions. Instructions sequence has
18570 SPLIT_COST cycles higher latency than lea latency. */
18572 static bool
18573 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18574 unsigned int regno2, int split_cost, bool has_scale)
18576 int dist_define, dist_use;
18578 /* For Silvermont if using a 2-source or 3-source LEA for
18579 non-destructive destination purposes, or due to wanting
18580 ability to use SCALE, the use of LEA is justified. */
18581 if (TARGET_SILVERMONT || TARGET_INTEL)
18583 if (has_scale)
18584 return true;
18585 if (split_cost < 1)
18586 return false;
18587 if (regno0 == regno1 || regno0 == regno2)
18588 return false;
18589 return true;
18592 dist_define = distance_non_agu_define (regno1, regno2, insn);
18593 dist_use = distance_agu_use (regno0, insn);
18595 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18597 /* If there is no non AGU operand definition, no AGU
18598 operand usage and split cost is 0 then both lea
18599 and non lea variants have same priority. Currently
18600 we prefer lea for 64 bit code and non lea on 32 bit
18601 code. */
18602 if (dist_use < 0 && split_cost == 0)
18603 return TARGET_64BIT || IX86_LEA_PRIORITY;
18604 else
18605 return true;
18608 /* With longer definitions distance lea is more preferable.
18609 Here we change it to take into account splitting cost and
18610 lea priority. */
18611 dist_define += split_cost + IX86_LEA_PRIORITY;
18613 /* If there is no use in memory addess then we just check
18614 that split cost exceeds AGU stall. */
18615 if (dist_use < 0)
18616 return dist_define > LEA_MAX_STALL;
18618 /* If this insn has both backward non-agu dependence and forward
18619 agu dependence, the one with short distance takes effect. */
18620 return dist_define >= dist_use;
18623 /* Return true if it is legal to clobber flags by INSN and
18624 false otherwise. */
18626 static bool
18627 ix86_ok_to_clobber_flags (rtx_insn *insn)
18629 basic_block bb = BLOCK_FOR_INSN (insn);
18630 df_ref use;
18631 bitmap live;
18633 while (insn)
18635 if (NONDEBUG_INSN_P (insn))
18637 FOR_EACH_INSN_USE (use, insn)
18638 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18639 return false;
18641 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18642 return true;
18645 if (insn == BB_END (bb))
18646 break;
18648 insn = NEXT_INSN (insn);
18651 live = df_get_live_out(bb);
18652 return !REGNO_REG_SET_P (live, FLAGS_REG);
18655 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18656 move and add to avoid AGU stalls. */
18658 bool
18659 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18661 unsigned int regno0, regno1, regno2;
18663 /* Check if we need to optimize. */
18664 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18665 return false;
18667 /* Check it is correct to split here. */
18668 if (!ix86_ok_to_clobber_flags(insn))
18669 return false;
18671 regno0 = true_regnum (operands[0]);
18672 regno1 = true_regnum (operands[1]);
18673 regno2 = true_regnum (operands[2]);
18675 /* We need to split only adds with non destructive
18676 destination operand. */
18677 if (regno0 == regno1 || regno0 == regno2)
18678 return false;
18679 else
18680 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18683 /* Return true if we should emit lea instruction instead of mov
18684 instruction. */
18686 bool
18687 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18689 unsigned int regno0, regno1;
18691 /* Check if we need to optimize. */
18692 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18693 return false;
18695 /* Use lea for reg to reg moves only. */
18696 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18697 return false;
18699 regno0 = true_regnum (operands[0]);
18700 regno1 = true_regnum (operands[1]);
18702 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18705 /* Return true if we need to split lea into a sequence of
18706 instructions to avoid AGU stalls. */
18708 bool
18709 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18711 unsigned int regno0, regno1, regno2;
18712 int split_cost;
18713 struct ix86_address parts;
18714 int ok;
18716 /* Check we need to optimize. */
18717 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18718 return false;
18720 /* The "at least two components" test below might not catch simple
18721 move or zero extension insns if parts.base is non-NULL and parts.disp
18722 is const0_rtx as the only components in the address, e.g. if the
18723 register is %rbp or %r13. As this test is much cheaper and moves or
18724 zero extensions are the common case, do this check first. */
18725 if (REG_P (operands[1])
18726 || (SImode_address_operand (operands[1], VOIDmode)
18727 && REG_P (XEXP (operands[1], 0))))
18728 return false;
18730 /* Check if it is OK to split here. */
18731 if (!ix86_ok_to_clobber_flags (insn))
18732 return false;
18734 ok = ix86_decompose_address (operands[1], &parts);
18735 gcc_assert (ok);
18737 /* There should be at least two components in the address. */
18738 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18739 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18740 return false;
18742 /* We should not split into add if non legitimate pic
18743 operand is used as displacement. */
18744 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18745 return false;
18747 regno0 = true_regnum (operands[0]) ;
18748 regno1 = INVALID_REGNUM;
18749 regno2 = INVALID_REGNUM;
18751 if (parts.base)
18752 regno1 = true_regnum (parts.base);
18753 if (parts.index)
18754 regno2 = true_regnum (parts.index);
18756 split_cost = 0;
18758 /* Compute how many cycles we will add to execution time
18759 if split lea into a sequence of instructions. */
18760 if (parts.base || parts.index)
18762 /* Have to use mov instruction if non desctructive
18763 destination form is used. */
18764 if (regno1 != regno0 && regno2 != regno0)
18765 split_cost += 1;
18767 /* Have to add index to base if both exist. */
18768 if (parts.base && parts.index)
18769 split_cost += 1;
18771 /* Have to use shift and adds if scale is 2 or greater. */
18772 if (parts.scale > 1)
18774 if (regno0 != regno1)
18775 split_cost += 1;
18776 else if (regno2 == regno0)
18777 split_cost += 4;
18778 else
18779 split_cost += parts.scale;
18782 /* Have to use add instruction with immediate if
18783 disp is non zero. */
18784 if (parts.disp && parts.disp != const0_rtx)
18785 split_cost += 1;
18787 /* Subtract the price of lea. */
18788 split_cost -= 1;
18791 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18792 parts.scale > 1);
18795 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18796 matches destination. RTX includes clobber of FLAGS_REG. */
18798 static void
18799 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18800 rtx dst, rtx src)
18802 rtx op, clob;
18804 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18805 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18807 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18810 /* Return true if regno1 def is nearest to the insn. */
18812 static bool
18813 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18815 rtx_insn *prev = insn;
18816 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18818 if (insn == start)
18819 return false;
18820 while (prev && prev != start)
18822 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18824 prev = PREV_INSN (prev);
18825 continue;
18827 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18828 return true;
18829 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18830 return false;
18831 prev = PREV_INSN (prev);
18834 /* None of the regs is defined in the bb. */
18835 return false;
18838 /* Split lea instructions into a sequence of instructions
18839 which are executed on ALU to avoid AGU stalls.
18840 It is assumed that it is allowed to clobber flags register
18841 at lea position. */
18843 void
18844 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18846 unsigned int regno0, regno1, regno2;
18847 struct ix86_address parts;
18848 rtx target, tmp;
18849 int ok, adds;
18851 ok = ix86_decompose_address (operands[1], &parts);
18852 gcc_assert (ok);
18854 target = gen_lowpart (mode, operands[0]);
18856 regno0 = true_regnum (target);
18857 regno1 = INVALID_REGNUM;
18858 regno2 = INVALID_REGNUM;
18860 if (parts.base)
18862 parts.base = gen_lowpart (mode, parts.base);
18863 regno1 = true_regnum (parts.base);
18866 if (parts.index)
18868 parts.index = gen_lowpart (mode, parts.index);
18869 regno2 = true_regnum (parts.index);
18872 if (parts.disp)
18873 parts.disp = gen_lowpart (mode, parts.disp);
18875 if (parts.scale > 1)
18877 /* Case r1 = r1 + ... */
18878 if (regno1 == regno0)
18880 /* If we have a case r1 = r1 + C * r2 then we
18881 should use multiplication which is very
18882 expensive. Assume cost model is wrong if we
18883 have such case here. */
18884 gcc_assert (regno2 != regno0);
18886 for (adds = parts.scale; adds > 0; adds--)
18887 ix86_emit_binop (PLUS, mode, target, parts.index);
18889 else
18891 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18892 if (regno0 != regno2)
18893 emit_insn (gen_rtx_SET (target, parts.index));
18895 /* Use shift for scaling. */
18896 ix86_emit_binop (ASHIFT, mode, target,
18897 GEN_INT (exact_log2 (parts.scale)));
18899 if (parts.base)
18900 ix86_emit_binop (PLUS, mode, target, parts.base);
18902 if (parts.disp && parts.disp != const0_rtx)
18903 ix86_emit_binop (PLUS, mode, target, parts.disp);
18906 else if (!parts.base && !parts.index)
18908 gcc_assert(parts.disp);
18909 emit_insn (gen_rtx_SET (target, parts.disp));
18911 else
18913 if (!parts.base)
18915 if (regno0 != regno2)
18916 emit_insn (gen_rtx_SET (target, parts.index));
18918 else if (!parts.index)
18920 if (regno0 != regno1)
18921 emit_insn (gen_rtx_SET (target, parts.base));
18923 else
18925 if (regno0 == regno1)
18926 tmp = parts.index;
18927 else if (regno0 == regno2)
18928 tmp = parts.base;
18929 else
18931 rtx tmp1;
18933 /* Find better operand for SET instruction, depending
18934 on which definition is farther from the insn. */
18935 if (find_nearest_reg_def (insn, regno1, regno2))
18936 tmp = parts.index, tmp1 = parts.base;
18937 else
18938 tmp = parts.base, tmp1 = parts.index;
18940 emit_insn (gen_rtx_SET (target, tmp));
18942 if (parts.disp && parts.disp != const0_rtx)
18943 ix86_emit_binop (PLUS, mode, target, parts.disp);
18945 ix86_emit_binop (PLUS, mode, target, tmp1);
18946 return;
18949 ix86_emit_binop (PLUS, mode, target, tmp);
18952 if (parts.disp && parts.disp != const0_rtx)
18953 ix86_emit_binop (PLUS, mode, target, parts.disp);
18957 /* Return true if it is ok to optimize an ADD operation to LEA
18958 operation to avoid flag register consumation. For most processors,
18959 ADD is faster than LEA. For the processors like BONNELL, if the
18960 destination register of LEA holds an actual address which will be
18961 used soon, LEA is better and otherwise ADD is better. */
18963 bool
18964 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18966 unsigned int regno0 = true_regnum (operands[0]);
18967 unsigned int regno1 = true_regnum (operands[1]);
18968 unsigned int regno2 = true_regnum (operands[2]);
18970 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18971 if (regno0 != regno1 && regno0 != regno2)
18972 return true;
18974 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18975 return false;
18977 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18980 /* Return true if destination reg of SET_BODY is shift count of
18981 USE_BODY. */
18983 static bool
18984 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18986 rtx set_dest;
18987 rtx shift_rtx;
18988 int i;
18990 /* Retrieve destination of SET_BODY. */
18991 switch (GET_CODE (set_body))
18993 case SET:
18994 set_dest = SET_DEST (set_body);
18995 if (!set_dest || !REG_P (set_dest))
18996 return false;
18997 break;
18998 case PARALLEL:
18999 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19000 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19001 use_body))
19002 return true;
19003 default:
19004 return false;
19005 break;
19008 /* Retrieve shift count of USE_BODY. */
19009 switch (GET_CODE (use_body))
19011 case SET:
19012 shift_rtx = XEXP (use_body, 1);
19013 break;
19014 case PARALLEL:
19015 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19016 if (ix86_dep_by_shift_count_body (set_body,
19017 XVECEXP (use_body, 0, i)))
19018 return true;
19019 default:
19020 return false;
19021 break;
19024 if (shift_rtx
19025 && (GET_CODE (shift_rtx) == ASHIFT
19026 || GET_CODE (shift_rtx) == LSHIFTRT
19027 || GET_CODE (shift_rtx) == ASHIFTRT
19028 || GET_CODE (shift_rtx) == ROTATE
19029 || GET_CODE (shift_rtx) == ROTATERT))
19031 rtx shift_count = XEXP (shift_rtx, 1);
19033 /* Return true if shift count is dest of SET_BODY. */
19034 if (REG_P (shift_count))
19036 /* Add check since it can be invoked before register
19037 allocation in pre-reload schedule. */
19038 if (reload_completed
19039 && true_regnum (set_dest) == true_regnum (shift_count))
19040 return true;
19041 else if (REGNO(set_dest) == REGNO(shift_count))
19042 return true;
19046 return false;
19049 /* Return true if destination reg of SET_INSN is shift count of
19050 USE_INSN. */
19052 bool
19053 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19055 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19056 PATTERN (use_insn));
19059 /* Return TRUE or FALSE depending on whether the unary operator meets the
19060 appropriate constraints. */
19062 bool
19063 ix86_unary_operator_ok (enum rtx_code,
19064 machine_mode,
19065 rtx operands[2])
19067 /* If one of operands is memory, source and destination must match. */
19068 if ((MEM_P (operands[0])
19069 || MEM_P (operands[1]))
19070 && ! rtx_equal_p (operands[0], operands[1]))
19071 return false;
19072 return true;
19075 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19076 are ok, keeping in mind the possible movddup alternative. */
19078 bool
19079 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19081 if (MEM_P (operands[0]))
19082 return rtx_equal_p (operands[0], operands[1 + high]);
19083 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19084 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19085 return true;
19088 /* Post-reload splitter for converting an SF or DFmode value in an
19089 SSE register into an unsigned SImode. */
19091 void
19092 ix86_split_convert_uns_si_sse (rtx operands[])
19094 machine_mode vecmode;
19095 rtx value, large, zero_or_two31, input, two31, x;
19097 large = operands[1];
19098 zero_or_two31 = operands[2];
19099 input = operands[3];
19100 two31 = operands[4];
19101 vecmode = GET_MODE (large);
19102 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19104 /* Load up the value into the low element. We must ensure that the other
19105 elements are valid floats -- zero is the easiest such value. */
19106 if (MEM_P (input))
19108 if (vecmode == V4SFmode)
19109 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19110 else
19111 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19113 else
19115 input = gen_rtx_REG (vecmode, REGNO (input));
19116 emit_move_insn (value, CONST0_RTX (vecmode));
19117 if (vecmode == V4SFmode)
19118 emit_insn (gen_sse_movss (value, value, input));
19119 else
19120 emit_insn (gen_sse2_movsd (value, value, input));
19123 emit_move_insn (large, two31);
19124 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19126 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19127 emit_insn (gen_rtx_SET (large, x));
19129 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19130 emit_insn (gen_rtx_SET (zero_or_two31, x));
19132 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19133 emit_insn (gen_rtx_SET (value, x));
19135 large = gen_rtx_REG (V4SImode, REGNO (large));
19136 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19138 x = gen_rtx_REG (V4SImode, REGNO (value));
19139 if (vecmode == V4SFmode)
19140 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19141 else
19142 emit_insn (gen_sse2_cvttpd2dq (x, value));
19143 value = x;
19145 emit_insn (gen_xorv4si3 (value, value, large));
19148 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19149 Expects the 64-bit DImode to be supplied in a pair of integral
19150 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19151 -mfpmath=sse, !optimize_size only. */
19153 void
19154 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19156 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19157 rtx int_xmm, fp_xmm;
19158 rtx biases, exponents;
19159 rtx x;
19161 int_xmm = gen_reg_rtx (V4SImode);
19162 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19163 emit_insn (gen_movdi_to_sse (int_xmm, input));
19164 else if (TARGET_SSE_SPLIT_REGS)
19166 emit_clobber (int_xmm);
19167 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19169 else
19171 x = gen_reg_rtx (V2DImode);
19172 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19173 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19176 x = gen_rtx_CONST_VECTOR (V4SImode,
19177 gen_rtvec (4, GEN_INT (0x43300000UL),
19178 GEN_INT (0x45300000UL),
19179 const0_rtx, const0_rtx));
19180 exponents = validize_mem (force_const_mem (V4SImode, x));
19182 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19183 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19185 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19186 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19187 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19188 (0x1.0p84 + double(fp_value_hi_xmm)).
19189 Note these exponents differ by 32. */
19191 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19193 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19194 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19195 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19196 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19197 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19198 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19199 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19200 biases = validize_mem (force_const_mem (V2DFmode, biases));
19201 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19203 /* Add the upper and lower DFmode values together. */
19204 if (TARGET_SSE3)
19205 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19206 else
19208 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19209 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19210 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19213 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19216 /* Not used, but eases macroization of patterns. */
19217 void
19218 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19220 gcc_unreachable ();
19223 /* Convert an unsigned SImode value into a DFmode. Only currently used
19224 for SSE, but applicable anywhere. */
19226 void
19227 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19229 REAL_VALUE_TYPE TWO31r;
19230 rtx x, fp;
19232 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19233 NULL, 1, OPTAB_DIRECT);
19235 fp = gen_reg_rtx (DFmode);
19236 emit_insn (gen_floatsidf2 (fp, x));
19238 real_ldexp (&TWO31r, &dconst1, 31);
19239 x = const_double_from_real_value (TWO31r, DFmode);
19241 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19242 if (x != target)
19243 emit_move_insn (target, x);
19246 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19247 32-bit mode; otherwise we have a direct convert instruction. */
19249 void
19250 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19252 REAL_VALUE_TYPE TWO32r;
19253 rtx fp_lo, fp_hi, x;
19255 fp_lo = gen_reg_rtx (DFmode);
19256 fp_hi = gen_reg_rtx (DFmode);
19258 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19260 real_ldexp (&TWO32r, &dconst1, 32);
19261 x = const_double_from_real_value (TWO32r, DFmode);
19262 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19264 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19266 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19267 0, OPTAB_DIRECT);
19268 if (x != target)
19269 emit_move_insn (target, x);
19272 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19273 For x86_32, -mfpmath=sse, !optimize_size only. */
19274 void
19275 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19277 REAL_VALUE_TYPE ONE16r;
19278 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19280 real_ldexp (&ONE16r, &dconst1, 16);
19281 x = const_double_from_real_value (ONE16r, SFmode);
19282 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19283 NULL, 0, OPTAB_DIRECT);
19284 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19285 NULL, 0, OPTAB_DIRECT);
19286 fp_hi = gen_reg_rtx (SFmode);
19287 fp_lo = gen_reg_rtx (SFmode);
19288 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19289 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19290 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19291 0, OPTAB_DIRECT);
19292 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19293 0, OPTAB_DIRECT);
19294 if (!rtx_equal_p (target, fp_hi))
19295 emit_move_insn (target, fp_hi);
19298 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19299 a vector of unsigned ints VAL to vector of floats TARGET. */
19301 void
19302 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19304 rtx tmp[8];
19305 REAL_VALUE_TYPE TWO16r;
19306 machine_mode intmode = GET_MODE (val);
19307 machine_mode fltmode = GET_MODE (target);
19308 rtx (*cvt) (rtx, rtx);
19310 if (intmode == V4SImode)
19311 cvt = gen_floatv4siv4sf2;
19312 else
19313 cvt = gen_floatv8siv8sf2;
19314 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19315 tmp[0] = force_reg (intmode, tmp[0]);
19316 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19317 OPTAB_DIRECT);
19318 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19319 NULL_RTX, 1, OPTAB_DIRECT);
19320 tmp[3] = gen_reg_rtx (fltmode);
19321 emit_insn (cvt (tmp[3], tmp[1]));
19322 tmp[4] = gen_reg_rtx (fltmode);
19323 emit_insn (cvt (tmp[4], tmp[2]));
19324 real_ldexp (&TWO16r, &dconst1, 16);
19325 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19326 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19327 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19328 OPTAB_DIRECT);
19329 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19330 OPTAB_DIRECT);
19331 if (tmp[7] != target)
19332 emit_move_insn (target, tmp[7]);
19335 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19336 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19337 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19338 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19341 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19343 REAL_VALUE_TYPE TWO31r;
19344 rtx two31r, tmp[4];
19345 machine_mode mode = GET_MODE (val);
19346 machine_mode scalarmode = GET_MODE_INNER (mode);
19347 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19348 rtx (*cmp) (rtx, rtx, rtx, rtx);
19349 int i;
19351 for (i = 0; i < 3; i++)
19352 tmp[i] = gen_reg_rtx (mode);
19353 real_ldexp (&TWO31r, &dconst1, 31);
19354 two31r = const_double_from_real_value (TWO31r, scalarmode);
19355 two31r = ix86_build_const_vector (mode, 1, two31r);
19356 two31r = force_reg (mode, two31r);
19357 switch (mode)
19359 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19360 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19361 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19362 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19363 default: gcc_unreachable ();
19365 tmp[3] = gen_rtx_LE (mode, two31r, val);
19366 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19367 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19368 0, OPTAB_DIRECT);
19369 if (intmode == V4SImode || TARGET_AVX2)
19370 *xorp = expand_simple_binop (intmode, ASHIFT,
19371 gen_lowpart (intmode, tmp[0]),
19372 GEN_INT (31), NULL_RTX, 0,
19373 OPTAB_DIRECT);
19374 else
19376 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19377 two31 = ix86_build_const_vector (intmode, 1, two31);
19378 *xorp = expand_simple_binop (intmode, AND,
19379 gen_lowpart (intmode, tmp[0]),
19380 two31, NULL_RTX, 0,
19381 OPTAB_DIRECT);
19383 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19384 0, OPTAB_DIRECT);
19387 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19388 then replicate the value for all elements of the vector
19389 register. */
19392 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19394 int i, n_elt;
19395 rtvec v;
19396 machine_mode scalar_mode;
19398 switch (mode)
19400 case V64QImode:
19401 case V32QImode:
19402 case V16QImode:
19403 case V32HImode:
19404 case V16HImode:
19405 case V8HImode:
19406 case V16SImode:
19407 case V8SImode:
19408 case V4SImode:
19409 case V8DImode:
19410 case V4DImode:
19411 case V2DImode:
19412 gcc_assert (vect);
19413 case V16SFmode:
19414 case V8SFmode:
19415 case V4SFmode:
19416 case V8DFmode:
19417 case V4DFmode:
19418 case V2DFmode:
19419 n_elt = GET_MODE_NUNITS (mode);
19420 v = rtvec_alloc (n_elt);
19421 scalar_mode = GET_MODE_INNER (mode);
19423 RTVEC_ELT (v, 0) = value;
19425 for (i = 1; i < n_elt; ++i)
19426 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19428 return gen_rtx_CONST_VECTOR (mode, v);
19430 default:
19431 gcc_unreachable ();
19435 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19436 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19437 for an SSE register. If VECT is true, then replicate the mask for
19438 all elements of the vector register. If INVERT is true, then create
19439 a mask excluding the sign bit. */
19442 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19444 machine_mode vec_mode, imode;
19445 wide_int w;
19446 rtx mask, v;
19448 switch (mode)
19450 case V16SImode:
19451 case V16SFmode:
19452 case V8SImode:
19453 case V4SImode:
19454 case V8SFmode:
19455 case V4SFmode:
19456 vec_mode = mode;
19457 mode = GET_MODE_INNER (mode);
19458 imode = SImode;
19459 break;
19461 case V8DImode:
19462 case V4DImode:
19463 case V2DImode:
19464 case V8DFmode:
19465 case V4DFmode:
19466 case V2DFmode:
19467 vec_mode = mode;
19468 mode = GET_MODE_INNER (mode);
19469 imode = DImode;
19470 break;
19472 case TImode:
19473 case TFmode:
19474 vec_mode = VOIDmode;
19475 imode = TImode;
19476 break;
19478 default:
19479 gcc_unreachable ();
19482 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19483 GET_MODE_BITSIZE (mode));
19484 if (invert)
19485 w = wi::bit_not (w);
19487 /* Force this value into the low part of a fp vector constant. */
19488 mask = immed_wide_int_const (w, imode);
19489 mask = gen_lowpart (mode, mask);
19491 if (vec_mode == VOIDmode)
19492 return force_reg (mode, mask);
19494 v = ix86_build_const_vector (vec_mode, vect, mask);
19495 return force_reg (vec_mode, v);
19498 /* Generate code for floating point ABS or NEG. */
19500 void
19501 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19502 rtx operands[])
19504 rtx mask, set, dst, src;
19505 bool use_sse = false;
19506 bool vector_mode = VECTOR_MODE_P (mode);
19507 machine_mode vmode = mode;
19509 if (vector_mode)
19510 use_sse = true;
19511 else if (mode == TFmode)
19512 use_sse = true;
19513 else if (TARGET_SSE_MATH)
19515 use_sse = SSE_FLOAT_MODE_P (mode);
19516 if (mode == SFmode)
19517 vmode = V4SFmode;
19518 else if (mode == DFmode)
19519 vmode = V2DFmode;
19522 /* NEG and ABS performed with SSE use bitwise mask operations.
19523 Create the appropriate mask now. */
19524 if (use_sse)
19525 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19526 else
19527 mask = NULL_RTX;
19529 dst = operands[0];
19530 src = operands[1];
19532 set = gen_rtx_fmt_e (code, mode, src);
19533 set = gen_rtx_SET (dst, set);
19535 if (mask)
19537 rtx use, clob;
19538 rtvec par;
19540 use = gen_rtx_USE (VOIDmode, mask);
19541 if (vector_mode)
19542 par = gen_rtvec (2, set, use);
19543 else
19545 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19546 par = gen_rtvec (3, set, use, clob);
19548 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19550 else
19551 emit_insn (set);
19554 /* Expand a copysign operation. Special case operand 0 being a constant. */
19556 void
19557 ix86_expand_copysign (rtx operands[])
19559 machine_mode mode, vmode;
19560 rtx dest, op0, op1, mask, nmask;
19562 dest = operands[0];
19563 op0 = operands[1];
19564 op1 = operands[2];
19566 mode = GET_MODE (dest);
19568 if (mode == SFmode)
19569 vmode = V4SFmode;
19570 else if (mode == DFmode)
19571 vmode = V2DFmode;
19572 else
19573 vmode = mode;
19575 if (CONST_DOUBLE_P (op0))
19577 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19579 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19580 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19582 if (mode == SFmode || mode == DFmode)
19584 if (op0 == CONST0_RTX (mode))
19585 op0 = CONST0_RTX (vmode);
19586 else
19588 rtx v = ix86_build_const_vector (vmode, false, op0);
19590 op0 = force_reg (vmode, v);
19593 else if (op0 != CONST0_RTX (mode))
19594 op0 = force_reg (mode, op0);
19596 mask = ix86_build_signbit_mask (vmode, 0, 0);
19598 if (mode == SFmode)
19599 copysign_insn = gen_copysignsf3_const;
19600 else if (mode == DFmode)
19601 copysign_insn = gen_copysigndf3_const;
19602 else
19603 copysign_insn = gen_copysigntf3_const;
19605 emit_insn (copysign_insn (dest, op0, op1, mask));
19607 else
19609 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19611 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19612 mask = ix86_build_signbit_mask (vmode, 0, 0);
19614 if (mode == SFmode)
19615 copysign_insn = gen_copysignsf3_var;
19616 else if (mode == DFmode)
19617 copysign_insn = gen_copysigndf3_var;
19618 else
19619 copysign_insn = gen_copysigntf3_var;
19621 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19625 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19626 be a constant, and so has already been expanded into a vector constant. */
19628 void
19629 ix86_split_copysign_const (rtx operands[])
19631 machine_mode mode, vmode;
19632 rtx dest, op0, mask, x;
19634 dest = operands[0];
19635 op0 = operands[1];
19636 mask = operands[3];
19638 mode = GET_MODE (dest);
19639 vmode = GET_MODE (mask);
19641 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19642 x = gen_rtx_AND (vmode, dest, mask);
19643 emit_insn (gen_rtx_SET (dest, x));
19645 if (op0 != CONST0_RTX (vmode))
19647 x = gen_rtx_IOR (vmode, dest, op0);
19648 emit_insn (gen_rtx_SET (dest, x));
19652 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19653 so we have to do two masks. */
19655 void
19656 ix86_split_copysign_var (rtx operands[])
19658 machine_mode mode, vmode;
19659 rtx dest, scratch, op0, op1, mask, nmask, x;
19661 dest = operands[0];
19662 scratch = operands[1];
19663 op0 = operands[2];
19664 op1 = operands[3];
19665 nmask = operands[4];
19666 mask = operands[5];
19668 mode = GET_MODE (dest);
19669 vmode = GET_MODE (mask);
19671 if (rtx_equal_p (op0, op1))
19673 /* Shouldn't happen often (it's useless, obviously), but when it does
19674 we'd generate incorrect code if we continue below. */
19675 emit_move_insn (dest, op0);
19676 return;
19679 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19681 gcc_assert (REGNO (op1) == REGNO (scratch));
19683 x = gen_rtx_AND (vmode, scratch, mask);
19684 emit_insn (gen_rtx_SET (scratch, x));
19686 dest = mask;
19687 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19688 x = gen_rtx_NOT (vmode, dest);
19689 x = gen_rtx_AND (vmode, x, op0);
19690 emit_insn (gen_rtx_SET (dest, x));
19692 else
19694 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19696 x = gen_rtx_AND (vmode, scratch, mask);
19698 else /* alternative 2,4 */
19700 gcc_assert (REGNO (mask) == REGNO (scratch));
19701 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19702 x = gen_rtx_AND (vmode, scratch, op1);
19704 emit_insn (gen_rtx_SET (scratch, x));
19706 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19708 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19709 x = gen_rtx_AND (vmode, dest, nmask);
19711 else /* alternative 3,4 */
19713 gcc_assert (REGNO (nmask) == REGNO (dest));
19714 dest = nmask;
19715 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19716 x = gen_rtx_AND (vmode, dest, op0);
19718 emit_insn (gen_rtx_SET (dest, x));
19721 x = gen_rtx_IOR (vmode, dest, scratch);
19722 emit_insn (gen_rtx_SET (dest, x));
19725 /* Return TRUE or FALSE depending on whether the first SET in INSN
19726 has source and destination with matching CC modes, and that the
19727 CC mode is at least as constrained as REQ_MODE. */
19729 bool
19730 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19732 rtx set;
19733 machine_mode set_mode;
19735 set = PATTERN (insn);
19736 if (GET_CODE (set) == PARALLEL)
19737 set = XVECEXP (set, 0, 0);
19738 gcc_assert (GET_CODE (set) == SET);
19739 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19741 set_mode = GET_MODE (SET_DEST (set));
19742 switch (set_mode)
19744 case CCNOmode:
19745 if (req_mode != CCNOmode
19746 && (req_mode != CCmode
19747 || XEXP (SET_SRC (set), 1) != const0_rtx))
19748 return false;
19749 break;
19750 case CCmode:
19751 if (req_mode == CCGCmode)
19752 return false;
19753 /* FALLTHRU */
19754 case CCGCmode:
19755 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19756 return false;
19757 /* FALLTHRU */
19758 case CCGOCmode:
19759 if (req_mode == CCZmode)
19760 return false;
19761 /* FALLTHRU */
19762 case CCZmode:
19763 break;
19765 case CCAmode:
19766 case CCCmode:
19767 case CCOmode:
19768 case CCSmode:
19769 if (set_mode != req_mode)
19770 return false;
19771 break;
19773 default:
19774 gcc_unreachable ();
19777 return GET_MODE (SET_SRC (set)) == set_mode;
19780 /* Generate insn patterns to do an integer compare of OPERANDS. */
19782 static rtx
19783 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19785 machine_mode cmpmode;
19786 rtx tmp, flags;
19788 cmpmode = SELECT_CC_MODE (code, op0, op1);
19789 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19791 /* This is very simple, but making the interface the same as in the
19792 FP case makes the rest of the code easier. */
19793 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19794 emit_insn (gen_rtx_SET (flags, tmp));
19796 /* Return the test that should be put into the flags user, i.e.
19797 the bcc, scc, or cmov instruction. */
19798 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19801 /* Figure out whether to use ordered or unordered fp comparisons.
19802 Return the appropriate mode to use. */
19804 machine_mode
19805 ix86_fp_compare_mode (enum rtx_code)
19807 /* ??? In order to make all comparisons reversible, we do all comparisons
19808 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19809 all forms trapping and nontrapping comparisons, we can make inequality
19810 comparisons trapping again, since it results in better code when using
19811 FCOM based compares. */
19812 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19815 machine_mode
19816 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19818 machine_mode mode = GET_MODE (op0);
19820 if (SCALAR_FLOAT_MODE_P (mode))
19822 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19823 return ix86_fp_compare_mode (code);
19826 switch (code)
19828 /* Only zero flag is needed. */
19829 case EQ: /* ZF=0 */
19830 case NE: /* ZF!=0 */
19831 return CCZmode;
19832 /* Codes needing carry flag. */
19833 case GEU: /* CF=0 */
19834 case LTU: /* CF=1 */
19835 /* Detect overflow checks. They need just the carry flag. */
19836 if (GET_CODE (op0) == PLUS
19837 && rtx_equal_p (op1, XEXP (op0, 0)))
19838 return CCCmode;
19839 else
19840 return CCmode;
19841 case GTU: /* CF=0 & ZF=0 */
19842 case LEU: /* CF=1 | ZF=1 */
19843 return CCmode;
19844 /* Codes possibly doable only with sign flag when
19845 comparing against zero. */
19846 case GE: /* SF=OF or SF=0 */
19847 case LT: /* SF<>OF or SF=1 */
19848 if (op1 == const0_rtx)
19849 return CCGOCmode;
19850 else
19851 /* For other cases Carry flag is not required. */
19852 return CCGCmode;
19853 /* Codes doable only with sign flag when comparing
19854 against zero, but we miss jump instruction for it
19855 so we need to use relational tests against overflow
19856 that thus needs to be zero. */
19857 case GT: /* ZF=0 & SF=OF */
19858 case LE: /* ZF=1 | SF<>OF */
19859 if (op1 == const0_rtx)
19860 return CCNOmode;
19861 else
19862 return CCGCmode;
19863 /* strcmp pattern do (use flags) and combine may ask us for proper
19864 mode. */
19865 case USE:
19866 return CCmode;
19867 default:
19868 gcc_unreachable ();
19872 /* Return the fixed registers used for condition codes. */
19874 static bool
19875 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19877 *p1 = FLAGS_REG;
19878 *p2 = FPSR_REG;
19879 return true;
19882 /* If two condition code modes are compatible, return a condition code
19883 mode which is compatible with both. Otherwise, return
19884 VOIDmode. */
19886 static machine_mode
19887 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19889 if (m1 == m2)
19890 return m1;
19892 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19893 return VOIDmode;
19895 if ((m1 == CCGCmode && m2 == CCGOCmode)
19896 || (m1 == CCGOCmode && m2 == CCGCmode))
19897 return CCGCmode;
19899 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19900 return m2;
19901 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19902 return m1;
19904 switch (m1)
19906 default:
19907 gcc_unreachable ();
19909 case CCmode:
19910 case CCGCmode:
19911 case CCGOCmode:
19912 case CCNOmode:
19913 case CCAmode:
19914 case CCCmode:
19915 case CCOmode:
19916 case CCSmode:
19917 case CCZmode:
19918 switch (m2)
19920 default:
19921 return VOIDmode;
19923 case CCmode:
19924 case CCGCmode:
19925 case CCGOCmode:
19926 case CCNOmode:
19927 case CCAmode:
19928 case CCCmode:
19929 case CCOmode:
19930 case CCSmode:
19931 case CCZmode:
19932 return CCmode;
19935 case CCFPmode:
19936 case CCFPUmode:
19937 /* These are only compatible with themselves, which we already
19938 checked above. */
19939 return VOIDmode;
19944 /* Return a comparison we can do and that it is equivalent to
19945 swap_condition (code) apart possibly from orderedness.
19946 But, never change orderedness if TARGET_IEEE_FP, returning
19947 UNKNOWN in that case if necessary. */
19949 static enum rtx_code
19950 ix86_fp_swap_condition (enum rtx_code code)
19952 switch (code)
19954 case GT: /* GTU - CF=0 & ZF=0 */
19955 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19956 case GE: /* GEU - CF=0 */
19957 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19958 case UNLT: /* LTU - CF=1 */
19959 return TARGET_IEEE_FP ? UNKNOWN : GT;
19960 case UNLE: /* LEU - CF=1 | ZF=1 */
19961 return TARGET_IEEE_FP ? UNKNOWN : GE;
19962 default:
19963 return swap_condition (code);
19967 /* Return cost of comparison CODE using the best strategy for performance.
19968 All following functions do use number of instructions as a cost metrics.
19969 In future this should be tweaked to compute bytes for optimize_size and
19970 take into account performance of various instructions on various CPUs. */
19972 static int
19973 ix86_fp_comparison_cost (enum rtx_code code)
19975 int arith_cost;
19977 /* The cost of code using bit-twiddling on %ah. */
19978 switch (code)
19980 case UNLE:
19981 case UNLT:
19982 case LTGT:
19983 case GT:
19984 case GE:
19985 case UNORDERED:
19986 case ORDERED:
19987 case UNEQ:
19988 arith_cost = 4;
19989 break;
19990 case LT:
19991 case NE:
19992 case EQ:
19993 case UNGE:
19994 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19995 break;
19996 case LE:
19997 case UNGT:
19998 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19999 break;
20000 default:
20001 gcc_unreachable ();
20004 switch (ix86_fp_comparison_strategy (code))
20006 case IX86_FPCMP_COMI:
20007 return arith_cost > 4 ? 3 : 2;
20008 case IX86_FPCMP_SAHF:
20009 return arith_cost > 4 ? 4 : 3;
20010 default:
20011 return arith_cost;
20015 /* Return strategy to use for floating-point. We assume that fcomi is always
20016 preferrable where available, since that is also true when looking at size
20017 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20019 enum ix86_fpcmp_strategy
20020 ix86_fp_comparison_strategy (enum rtx_code)
20022 /* Do fcomi/sahf based test when profitable. */
20024 if (TARGET_CMOVE)
20025 return IX86_FPCMP_COMI;
20027 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20028 return IX86_FPCMP_SAHF;
20030 return IX86_FPCMP_ARITH;
20033 /* Swap, force into registers, or otherwise massage the two operands
20034 to a fp comparison. The operands are updated in place; the new
20035 comparison code is returned. */
20037 static enum rtx_code
20038 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20040 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20041 rtx op0 = *pop0, op1 = *pop1;
20042 machine_mode op_mode = GET_MODE (op0);
20043 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20045 /* All of the unordered compare instructions only work on registers.
20046 The same is true of the fcomi compare instructions. The XFmode
20047 compare instructions require registers except when comparing
20048 against zero or when converting operand 1 from fixed point to
20049 floating point. */
20051 if (!is_sse
20052 && (fpcmp_mode == CCFPUmode
20053 || (op_mode == XFmode
20054 && ! (standard_80387_constant_p (op0) == 1
20055 || standard_80387_constant_p (op1) == 1)
20056 && GET_CODE (op1) != FLOAT)
20057 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20059 op0 = force_reg (op_mode, op0);
20060 op1 = force_reg (op_mode, op1);
20062 else
20064 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20065 things around if they appear profitable, otherwise force op0
20066 into a register. */
20068 if (standard_80387_constant_p (op0) == 0
20069 || (MEM_P (op0)
20070 && ! (standard_80387_constant_p (op1) == 0
20071 || MEM_P (op1))))
20073 enum rtx_code new_code = ix86_fp_swap_condition (code);
20074 if (new_code != UNKNOWN)
20076 std::swap (op0, op1);
20077 code = new_code;
20081 if (!REG_P (op0))
20082 op0 = force_reg (op_mode, op0);
20084 if (CONSTANT_P (op1))
20086 int tmp = standard_80387_constant_p (op1);
20087 if (tmp == 0)
20088 op1 = validize_mem (force_const_mem (op_mode, op1));
20089 else if (tmp == 1)
20091 if (TARGET_CMOVE)
20092 op1 = force_reg (op_mode, op1);
20094 else
20095 op1 = force_reg (op_mode, op1);
20099 /* Try to rearrange the comparison to make it cheaper. */
20100 if (ix86_fp_comparison_cost (code)
20101 > ix86_fp_comparison_cost (swap_condition (code))
20102 && (REG_P (op1) || can_create_pseudo_p ()))
20104 std::swap (op0, op1);
20105 code = swap_condition (code);
20106 if (!REG_P (op0))
20107 op0 = force_reg (op_mode, op0);
20110 *pop0 = op0;
20111 *pop1 = op1;
20112 return code;
20115 /* Convert comparison codes we use to represent FP comparison to integer
20116 code that will result in proper branch. Return UNKNOWN if no such code
20117 is available. */
20119 enum rtx_code
20120 ix86_fp_compare_code_to_integer (enum rtx_code code)
20122 switch (code)
20124 case GT:
20125 return GTU;
20126 case GE:
20127 return GEU;
20128 case ORDERED:
20129 case UNORDERED:
20130 return code;
20131 break;
20132 case UNEQ:
20133 return EQ;
20134 break;
20135 case UNLT:
20136 return LTU;
20137 break;
20138 case UNLE:
20139 return LEU;
20140 break;
20141 case LTGT:
20142 return NE;
20143 break;
20144 default:
20145 return UNKNOWN;
20149 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20151 static rtx
20152 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20154 machine_mode fpcmp_mode, intcmp_mode;
20155 rtx tmp, tmp2;
20157 fpcmp_mode = ix86_fp_compare_mode (code);
20158 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20160 /* Do fcomi/sahf based test when profitable. */
20161 switch (ix86_fp_comparison_strategy (code))
20163 case IX86_FPCMP_COMI:
20164 intcmp_mode = fpcmp_mode;
20165 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20166 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20167 emit_insn (tmp);
20168 break;
20170 case IX86_FPCMP_SAHF:
20171 intcmp_mode = fpcmp_mode;
20172 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20173 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20175 if (!scratch)
20176 scratch = gen_reg_rtx (HImode);
20177 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20178 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20179 break;
20181 case IX86_FPCMP_ARITH:
20182 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20183 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20184 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20185 if (!scratch)
20186 scratch = gen_reg_rtx (HImode);
20187 emit_insn (gen_rtx_SET (scratch, tmp2));
20189 /* In the unordered case, we have to check C2 for NaN's, which
20190 doesn't happen to work out to anything nice combination-wise.
20191 So do some bit twiddling on the value we've got in AH to come
20192 up with an appropriate set of condition codes. */
20194 intcmp_mode = CCNOmode;
20195 switch (code)
20197 case GT:
20198 case UNGT:
20199 if (code == GT || !TARGET_IEEE_FP)
20201 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20202 code = EQ;
20204 else
20206 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20207 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20208 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20209 intcmp_mode = CCmode;
20210 code = GEU;
20212 break;
20213 case LT:
20214 case UNLT:
20215 if (code == LT && TARGET_IEEE_FP)
20217 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20218 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20219 intcmp_mode = CCmode;
20220 code = EQ;
20222 else
20224 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20225 code = NE;
20227 break;
20228 case GE:
20229 case UNGE:
20230 if (code == GE || !TARGET_IEEE_FP)
20232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20233 code = EQ;
20235 else
20237 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20238 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20239 code = NE;
20241 break;
20242 case LE:
20243 case UNLE:
20244 if (code == LE && TARGET_IEEE_FP)
20246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20249 intcmp_mode = CCmode;
20250 code = LTU;
20252 else
20254 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20255 code = NE;
20257 break;
20258 case EQ:
20259 case UNEQ:
20260 if (code == EQ && TARGET_IEEE_FP)
20262 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20263 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20264 intcmp_mode = CCmode;
20265 code = EQ;
20267 else
20269 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20270 code = NE;
20272 break;
20273 case NE:
20274 case LTGT:
20275 if (code == NE && TARGET_IEEE_FP)
20277 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20278 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20279 GEN_INT (0x40)));
20280 code = NE;
20282 else
20284 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20285 code = EQ;
20287 break;
20289 case UNORDERED:
20290 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20291 code = NE;
20292 break;
20293 case ORDERED:
20294 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20295 code = EQ;
20296 break;
20298 default:
20299 gcc_unreachable ();
20301 break;
20303 default:
20304 gcc_unreachable();
20307 /* Return the test that should be put into the flags user, i.e.
20308 the bcc, scc, or cmov instruction. */
20309 return gen_rtx_fmt_ee (code, VOIDmode,
20310 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20311 const0_rtx);
20314 static rtx
20315 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20317 rtx ret;
20319 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20320 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20322 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20324 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20325 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20327 else
20328 ret = ix86_expand_int_compare (code, op0, op1);
20330 return ret;
20333 void
20334 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20336 machine_mode mode = GET_MODE (op0);
20337 rtx tmp;
20339 switch (mode)
20341 case SFmode:
20342 case DFmode:
20343 case XFmode:
20344 case QImode:
20345 case HImode:
20346 case SImode:
20347 simple:
20348 tmp = ix86_expand_compare (code, op0, op1);
20349 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20350 gen_rtx_LABEL_REF (VOIDmode, label),
20351 pc_rtx);
20352 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20353 return;
20355 case DImode:
20356 if (TARGET_64BIT)
20357 goto simple;
20358 case TImode:
20359 /* Expand DImode branch into multiple compare+branch. */
20361 rtx lo[2], hi[2];
20362 rtx_code_label *label2;
20363 enum rtx_code code1, code2, code3;
20364 machine_mode submode;
20366 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20368 std::swap (op0, op1);
20369 code = swap_condition (code);
20372 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20373 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20375 submode = mode == DImode ? SImode : DImode;
20377 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20378 avoid two branches. This costs one extra insn, so disable when
20379 optimizing for size. */
20381 if ((code == EQ || code == NE)
20382 && (!optimize_insn_for_size_p ()
20383 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20385 rtx xor0, xor1;
20387 xor1 = hi[0];
20388 if (hi[1] != const0_rtx)
20389 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20390 NULL_RTX, 0, OPTAB_WIDEN);
20392 xor0 = lo[0];
20393 if (lo[1] != const0_rtx)
20394 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20395 NULL_RTX, 0, OPTAB_WIDEN);
20397 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20398 NULL_RTX, 0, OPTAB_WIDEN);
20400 ix86_expand_branch (code, tmp, const0_rtx, label);
20401 return;
20404 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20405 op1 is a constant and the low word is zero, then we can just
20406 examine the high word. Similarly for low word -1 and
20407 less-or-equal-than or greater-than. */
20409 if (CONST_INT_P (hi[1]))
20410 switch (code)
20412 case LT: case LTU: case GE: case GEU:
20413 if (lo[1] == const0_rtx)
20415 ix86_expand_branch (code, hi[0], hi[1], label);
20416 return;
20418 break;
20419 case LE: case LEU: case GT: case GTU:
20420 if (lo[1] == constm1_rtx)
20422 ix86_expand_branch (code, hi[0], hi[1], label);
20423 return;
20425 break;
20426 default:
20427 break;
20430 /* Otherwise, we need two or three jumps. */
20432 label2 = gen_label_rtx ();
20434 code1 = code;
20435 code2 = swap_condition (code);
20436 code3 = unsigned_condition (code);
20438 switch (code)
20440 case LT: case GT: case LTU: case GTU:
20441 break;
20443 case LE: code1 = LT; code2 = GT; break;
20444 case GE: code1 = GT; code2 = LT; break;
20445 case LEU: code1 = LTU; code2 = GTU; break;
20446 case GEU: code1 = GTU; code2 = LTU; break;
20448 case EQ: code1 = UNKNOWN; code2 = NE; break;
20449 case NE: code2 = UNKNOWN; break;
20451 default:
20452 gcc_unreachable ();
20456 * a < b =>
20457 * if (hi(a) < hi(b)) goto true;
20458 * if (hi(a) > hi(b)) goto false;
20459 * if (lo(a) < lo(b)) goto true;
20460 * false:
20463 if (code1 != UNKNOWN)
20464 ix86_expand_branch (code1, hi[0], hi[1], label);
20465 if (code2 != UNKNOWN)
20466 ix86_expand_branch (code2, hi[0], hi[1], label2);
20468 ix86_expand_branch (code3, lo[0], lo[1], label);
20470 if (code2 != UNKNOWN)
20471 emit_label (label2);
20472 return;
20475 default:
20476 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20477 goto simple;
20481 /* Split branch based on floating point condition. */
20482 void
20483 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20484 rtx target1, rtx target2, rtx tmp)
20486 rtx condition;
20487 rtx i;
20489 if (target2 != pc_rtx)
20491 std::swap (target1, target2);
20492 code = reverse_condition_maybe_unordered (code);
20495 condition = ix86_expand_fp_compare (code, op1, op2,
20496 tmp);
20498 i = emit_jump_insn (gen_rtx_SET
20499 (pc_rtx,
20500 gen_rtx_IF_THEN_ELSE (VOIDmode,
20501 condition, target1, target2)));
20502 if (split_branch_probability >= 0)
20503 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20506 void
20507 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20509 rtx ret;
20511 gcc_assert (GET_MODE (dest) == QImode);
20513 ret = ix86_expand_compare (code, op0, op1);
20514 PUT_MODE (ret, QImode);
20515 emit_insn (gen_rtx_SET (dest, ret));
20518 /* Expand comparison setting or clearing carry flag. Return true when
20519 successful and set pop for the operation. */
20520 static bool
20521 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20523 machine_mode mode =
20524 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20526 /* Do not handle double-mode compares that go through special path. */
20527 if (mode == (TARGET_64BIT ? TImode : DImode))
20528 return false;
20530 if (SCALAR_FLOAT_MODE_P (mode))
20532 rtx compare_op;
20533 rtx_insn *compare_seq;
20535 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20537 /* Shortcut: following common codes never translate
20538 into carry flag compares. */
20539 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20540 || code == ORDERED || code == UNORDERED)
20541 return false;
20543 /* These comparisons require zero flag; swap operands so they won't. */
20544 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20545 && !TARGET_IEEE_FP)
20547 std::swap (op0, op1);
20548 code = swap_condition (code);
20551 /* Try to expand the comparison and verify that we end up with
20552 carry flag based comparison. This fails to be true only when
20553 we decide to expand comparison using arithmetic that is not
20554 too common scenario. */
20555 start_sequence ();
20556 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20557 compare_seq = get_insns ();
20558 end_sequence ();
20560 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20561 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20562 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20563 else
20564 code = GET_CODE (compare_op);
20566 if (code != LTU && code != GEU)
20567 return false;
20569 emit_insn (compare_seq);
20570 *pop = compare_op;
20571 return true;
20574 if (!INTEGRAL_MODE_P (mode))
20575 return false;
20577 switch (code)
20579 case LTU:
20580 case GEU:
20581 break;
20583 /* Convert a==0 into (unsigned)a<1. */
20584 case EQ:
20585 case NE:
20586 if (op1 != const0_rtx)
20587 return false;
20588 op1 = const1_rtx;
20589 code = (code == EQ ? LTU : GEU);
20590 break;
20592 /* Convert a>b into b<a or a>=b-1. */
20593 case GTU:
20594 case LEU:
20595 if (CONST_INT_P (op1))
20597 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20598 /* Bail out on overflow. We still can swap operands but that
20599 would force loading of the constant into register. */
20600 if (op1 == const0_rtx
20601 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20602 return false;
20603 code = (code == GTU ? GEU : LTU);
20605 else
20607 std::swap (op0, op1);
20608 code = (code == GTU ? LTU : GEU);
20610 break;
20612 /* Convert a>=0 into (unsigned)a<0x80000000. */
20613 case LT:
20614 case GE:
20615 if (mode == DImode || op1 != const0_rtx)
20616 return false;
20617 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20618 code = (code == LT ? GEU : LTU);
20619 break;
20620 case LE:
20621 case GT:
20622 if (mode == DImode || op1 != constm1_rtx)
20623 return false;
20624 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20625 code = (code == LE ? GEU : LTU);
20626 break;
20628 default:
20629 return false;
20631 /* Swapping operands may cause constant to appear as first operand. */
20632 if (!nonimmediate_operand (op0, VOIDmode))
20634 if (!can_create_pseudo_p ())
20635 return false;
20636 op0 = force_reg (mode, op0);
20638 *pop = ix86_expand_compare (code, op0, op1);
20639 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20640 return true;
20643 bool
20644 ix86_expand_int_movcc (rtx operands[])
20646 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20647 rtx_insn *compare_seq;
20648 rtx compare_op;
20649 machine_mode mode = GET_MODE (operands[0]);
20650 bool sign_bit_compare_p = false;
20651 rtx op0 = XEXP (operands[1], 0);
20652 rtx op1 = XEXP (operands[1], 1);
20654 if (GET_MODE (op0) == TImode
20655 || (GET_MODE (op0) == DImode
20656 && !TARGET_64BIT))
20657 return false;
20659 start_sequence ();
20660 compare_op = ix86_expand_compare (code, op0, op1);
20661 compare_seq = get_insns ();
20662 end_sequence ();
20664 compare_code = GET_CODE (compare_op);
20666 if ((op1 == const0_rtx && (code == GE || code == LT))
20667 || (op1 == constm1_rtx && (code == GT || code == LE)))
20668 sign_bit_compare_p = true;
20670 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20671 HImode insns, we'd be swallowed in word prefix ops. */
20673 if ((mode != HImode || TARGET_FAST_PREFIX)
20674 && (mode != (TARGET_64BIT ? TImode : DImode))
20675 && CONST_INT_P (operands[2])
20676 && CONST_INT_P (operands[3]))
20678 rtx out = operands[0];
20679 HOST_WIDE_INT ct = INTVAL (operands[2]);
20680 HOST_WIDE_INT cf = INTVAL (operands[3]);
20681 HOST_WIDE_INT diff;
20683 diff = ct - cf;
20684 /* Sign bit compares are better done using shifts than we do by using
20685 sbb. */
20686 if (sign_bit_compare_p
20687 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20689 /* Detect overlap between destination and compare sources. */
20690 rtx tmp = out;
20692 if (!sign_bit_compare_p)
20694 rtx flags;
20695 bool fpcmp = false;
20697 compare_code = GET_CODE (compare_op);
20699 flags = XEXP (compare_op, 0);
20701 if (GET_MODE (flags) == CCFPmode
20702 || GET_MODE (flags) == CCFPUmode)
20704 fpcmp = true;
20705 compare_code
20706 = ix86_fp_compare_code_to_integer (compare_code);
20709 /* To simplify rest of code, restrict to the GEU case. */
20710 if (compare_code == LTU)
20712 std::swap (ct, cf);
20713 compare_code = reverse_condition (compare_code);
20714 code = reverse_condition (code);
20716 else
20718 if (fpcmp)
20719 PUT_CODE (compare_op,
20720 reverse_condition_maybe_unordered
20721 (GET_CODE (compare_op)));
20722 else
20723 PUT_CODE (compare_op,
20724 reverse_condition (GET_CODE (compare_op)));
20726 diff = ct - cf;
20728 if (reg_overlap_mentioned_p (out, op0)
20729 || reg_overlap_mentioned_p (out, op1))
20730 tmp = gen_reg_rtx (mode);
20732 if (mode == DImode)
20733 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20734 else
20735 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20736 flags, compare_op));
20738 else
20740 if (code == GT || code == GE)
20741 code = reverse_condition (code);
20742 else
20744 std::swap (ct, cf);
20745 diff = ct - cf;
20747 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20750 if (diff == 1)
20753 * cmpl op0,op1
20754 * sbbl dest,dest
20755 * [addl dest, ct]
20757 * Size 5 - 8.
20759 if (ct)
20760 tmp = expand_simple_binop (mode, PLUS,
20761 tmp, GEN_INT (ct),
20762 copy_rtx (tmp), 1, OPTAB_DIRECT);
20764 else if (cf == -1)
20767 * cmpl op0,op1
20768 * sbbl dest,dest
20769 * orl $ct, dest
20771 * Size 8.
20773 tmp = expand_simple_binop (mode, IOR,
20774 tmp, GEN_INT (ct),
20775 copy_rtx (tmp), 1, OPTAB_DIRECT);
20777 else if (diff == -1 && ct)
20780 * cmpl op0,op1
20781 * sbbl dest,dest
20782 * notl dest
20783 * [addl dest, cf]
20785 * Size 8 - 11.
20787 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20788 if (cf)
20789 tmp = expand_simple_binop (mode, PLUS,
20790 copy_rtx (tmp), GEN_INT (cf),
20791 copy_rtx (tmp), 1, OPTAB_DIRECT);
20793 else
20796 * cmpl op0,op1
20797 * sbbl dest,dest
20798 * [notl dest]
20799 * andl cf - ct, dest
20800 * [addl dest, ct]
20802 * Size 8 - 11.
20805 if (cf == 0)
20807 cf = ct;
20808 ct = 0;
20809 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20812 tmp = expand_simple_binop (mode, AND,
20813 copy_rtx (tmp),
20814 gen_int_mode (cf - ct, mode),
20815 copy_rtx (tmp), 1, OPTAB_DIRECT);
20816 if (ct)
20817 tmp = expand_simple_binop (mode, PLUS,
20818 copy_rtx (tmp), GEN_INT (ct),
20819 copy_rtx (tmp), 1, OPTAB_DIRECT);
20822 if (!rtx_equal_p (tmp, out))
20823 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20825 return true;
20828 if (diff < 0)
20830 machine_mode cmp_mode = GET_MODE (op0);
20831 enum rtx_code new_code;
20833 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20835 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20837 /* We may be reversing unordered compare to normal compare, that
20838 is not valid in general (we may convert non-trapping condition
20839 to trapping one), however on i386 we currently emit all
20840 comparisons unordered. */
20841 new_code = reverse_condition_maybe_unordered (code);
20843 else
20844 new_code = ix86_reverse_condition (code, cmp_mode);
20845 if (new_code != UNKNOWN)
20847 std::swap (ct, cf);
20848 diff = -diff;
20849 code = new_code;
20853 compare_code = UNKNOWN;
20854 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20855 && CONST_INT_P (op1))
20857 if (op1 == const0_rtx
20858 && (code == LT || code == GE))
20859 compare_code = code;
20860 else if (op1 == constm1_rtx)
20862 if (code == LE)
20863 compare_code = LT;
20864 else if (code == GT)
20865 compare_code = GE;
20869 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20870 if (compare_code != UNKNOWN
20871 && GET_MODE (op0) == GET_MODE (out)
20872 && (cf == -1 || ct == -1))
20874 /* If lea code below could be used, only optimize
20875 if it results in a 2 insn sequence. */
20877 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20878 || diff == 3 || diff == 5 || diff == 9)
20879 || (compare_code == LT && ct == -1)
20880 || (compare_code == GE && cf == -1))
20883 * notl op1 (if necessary)
20884 * sarl $31, op1
20885 * orl cf, op1
20887 if (ct != -1)
20889 cf = ct;
20890 ct = -1;
20891 code = reverse_condition (code);
20894 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20896 out = expand_simple_binop (mode, IOR,
20897 out, GEN_INT (cf),
20898 out, 1, OPTAB_DIRECT);
20899 if (out != operands[0])
20900 emit_move_insn (operands[0], out);
20902 return true;
20907 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20908 || diff == 3 || diff == 5 || diff == 9)
20909 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20910 && (mode != DImode
20911 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20914 * xorl dest,dest
20915 * cmpl op1,op2
20916 * setcc dest
20917 * lea cf(dest*(ct-cf)),dest
20919 * Size 14.
20921 * This also catches the degenerate setcc-only case.
20924 rtx tmp;
20925 int nops;
20927 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20929 nops = 0;
20930 /* On x86_64 the lea instruction operates on Pmode, so we need
20931 to get arithmetics done in proper mode to match. */
20932 if (diff == 1)
20933 tmp = copy_rtx (out);
20934 else
20936 rtx out1;
20937 out1 = copy_rtx (out);
20938 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20939 nops++;
20940 if (diff & 1)
20942 tmp = gen_rtx_PLUS (mode, tmp, out1);
20943 nops++;
20946 if (cf != 0)
20948 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20949 nops++;
20951 if (!rtx_equal_p (tmp, out))
20953 if (nops == 1)
20954 out = force_operand (tmp, copy_rtx (out));
20955 else
20956 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
20958 if (!rtx_equal_p (out, operands[0]))
20959 emit_move_insn (operands[0], copy_rtx (out));
20961 return true;
20965 * General case: Jumpful:
20966 * xorl dest,dest cmpl op1, op2
20967 * cmpl op1, op2 movl ct, dest
20968 * setcc dest jcc 1f
20969 * decl dest movl cf, dest
20970 * andl (cf-ct),dest 1:
20971 * addl ct,dest
20973 * Size 20. Size 14.
20975 * This is reasonably steep, but branch mispredict costs are
20976 * high on modern cpus, so consider failing only if optimizing
20977 * for space.
20980 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20981 && BRANCH_COST (optimize_insn_for_speed_p (),
20982 false) >= 2)
20984 if (cf == 0)
20986 machine_mode cmp_mode = GET_MODE (op0);
20987 enum rtx_code new_code;
20989 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20991 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20993 /* We may be reversing unordered compare to normal compare,
20994 that is not valid in general (we may convert non-trapping
20995 condition to trapping one), however on i386 we currently
20996 emit all comparisons unordered. */
20997 new_code = reverse_condition_maybe_unordered (code);
20999 else
21001 new_code = ix86_reverse_condition (code, cmp_mode);
21002 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21003 compare_code = reverse_condition (compare_code);
21006 if (new_code != UNKNOWN)
21008 cf = ct;
21009 ct = 0;
21010 code = new_code;
21014 if (compare_code != UNKNOWN)
21016 /* notl op1 (if needed)
21017 sarl $31, op1
21018 andl (cf-ct), op1
21019 addl ct, op1
21021 For x < 0 (resp. x <= -1) there will be no notl,
21022 so if possible swap the constants to get rid of the
21023 complement.
21024 True/false will be -1/0 while code below (store flag
21025 followed by decrement) is 0/-1, so the constants need
21026 to be exchanged once more. */
21028 if (compare_code == GE || !cf)
21030 code = reverse_condition (code);
21031 compare_code = LT;
21033 else
21034 std::swap (ct, cf);
21036 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21038 else
21040 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21042 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21043 constm1_rtx,
21044 copy_rtx (out), 1, OPTAB_DIRECT);
21047 out = expand_simple_binop (mode, AND, copy_rtx (out),
21048 gen_int_mode (cf - ct, mode),
21049 copy_rtx (out), 1, OPTAB_DIRECT);
21050 if (ct)
21051 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21052 copy_rtx (out), 1, OPTAB_DIRECT);
21053 if (!rtx_equal_p (out, operands[0]))
21054 emit_move_insn (operands[0], copy_rtx (out));
21056 return true;
21060 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21062 /* Try a few things more with specific constants and a variable. */
21064 optab op;
21065 rtx var, orig_out, out, tmp;
21067 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21068 return false;
21070 /* If one of the two operands is an interesting constant, load a
21071 constant with the above and mask it in with a logical operation. */
21073 if (CONST_INT_P (operands[2]))
21075 var = operands[3];
21076 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21077 operands[3] = constm1_rtx, op = and_optab;
21078 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21079 operands[3] = const0_rtx, op = ior_optab;
21080 else
21081 return false;
21083 else if (CONST_INT_P (operands[3]))
21085 var = operands[2];
21086 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21087 operands[2] = constm1_rtx, op = and_optab;
21088 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21089 operands[2] = const0_rtx, op = ior_optab;
21090 else
21091 return false;
21093 else
21094 return false;
21096 orig_out = operands[0];
21097 tmp = gen_reg_rtx (mode);
21098 operands[0] = tmp;
21100 /* Recurse to get the constant loaded. */
21101 if (ix86_expand_int_movcc (operands) == 0)
21102 return false;
21104 /* Mask in the interesting variable. */
21105 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21106 OPTAB_WIDEN);
21107 if (!rtx_equal_p (out, orig_out))
21108 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21110 return true;
21114 * For comparison with above,
21116 * movl cf,dest
21117 * movl ct,tmp
21118 * cmpl op1,op2
21119 * cmovcc tmp,dest
21121 * Size 15.
21124 if (! nonimmediate_operand (operands[2], mode))
21125 operands[2] = force_reg (mode, operands[2]);
21126 if (! nonimmediate_operand (operands[3], mode))
21127 operands[3] = force_reg (mode, operands[3]);
21129 if (! register_operand (operands[2], VOIDmode)
21130 && (mode == QImode
21131 || ! register_operand (operands[3], VOIDmode)))
21132 operands[2] = force_reg (mode, operands[2]);
21134 if (mode == QImode
21135 && ! register_operand (operands[3], VOIDmode))
21136 operands[3] = force_reg (mode, operands[3]);
21138 emit_insn (compare_seq);
21139 emit_insn (gen_rtx_SET (operands[0],
21140 gen_rtx_IF_THEN_ELSE (mode,
21141 compare_op, operands[2],
21142 operands[3])));
21143 return true;
21146 /* Swap, force into registers, or otherwise massage the two operands
21147 to an sse comparison with a mask result. Thus we differ a bit from
21148 ix86_prepare_fp_compare_args which expects to produce a flags result.
21150 The DEST operand exists to help determine whether to commute commutative
21151 operators. The POP0/POP1 operands are updated in place. The new
21152 comparison code is returned, or UNKNOWN if not implementable. */
21154 static enum rtx_code
21155 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21156 rtx *pop0, rtx *pop1)
21158 switch (code)
21160 case LTGT:
21161 case UNEQ:
21162 /* AVX supports all the needed comparisons. */
21163 if (TARGET_AVX)
21164 break;
21165 /* We have no LTGT as an operator. We could implement it with
21166 NE & ORDERED, but this requires an extra temporary. It's
21167 not clear that it's worth it. */
21168 return UNKNOWN;
21170 case LT:
21171 case LE:
21172 case UNGT:
21173 case UNGE:
21174 /* These are supported directly. */
21175 break;
21177 case EQ:
21178 case NE:
21179 case UNORDERED:
21180 case ORDERED:
21181 /* AVX has 3 operand comparisons, no need to swap anything. */
21182 if (TARGET_AVX)
21183 break;
21184 /* For commutative operators, try to canonicalize the destination
21185 operand to be first in the comparison - this helps reload to
21186 avoid extra moves. */
21187 if (!dest || !rtx_equal_p (dest, *pop1))
21188 break;
21189 /* FALLTHRU */
21191 case GE:
21192 case GT:
21193 case UNLE:
21194 case UNLT:
21195 /* These are not supported directly before AVX, and furthermore
21196 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21197 comparison operands to transform into something that is
21198 supported. */
21199 std::swap (*pop0, *pop1);
21200 code = swap_condition (code);
21201 break;
21203 default:
21204 gcc_unreachable ();
21207 return code;
21210 /* Detect conditional moves that exactly match min/max operational
21211 semantics. Note that this is IEEE safe, as long as we don't
21212 interchange the operands.
21214 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21215 and TRUE if the operation is successful and instructions are emitted. */
21217 static bool
21218 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21219 rtx cmp_op1, rtx if_true, rtx if_false)
21221 machine_mode mode;
21222 bool is_min;
21223 rtx tmp;
21225 if (code == LT)
21227 else if (code == UNGE)
21228 std::swap (if_true, if_false);
21229 else
21230 return false;
21232 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21233 is_min = true;
21234 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21235 is_min = false;
21236 else
21237 return false;
21239 mode = GET_MODE (dest);
21241 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21242 but MODE may be a vector mode and thus not appropriate. */
21243 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21245 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21246 rtvec v;
21248 if_true = force_reg (mode, if_true);
21249 v = gen_rtvec (2, if_true, if_false);
21250 tmp = gen_rtx_UNSPEC (mode, v, u);
21252 else
21254 code = is_min ? SMIN : SMAX;
21255 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21258 emit_insn (gen_rtx_SET (dest, tmp));
21259 return true;
21262 /* Expand an sse vector comparison. Return the register with the result. */
21264 static rtx
21265 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21266 rtx op_true, rtx op_false)
21268 machine_mode mode = GET_MODE (dest);
21269 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21271 /* In general case result of comparison can differ from operands' type. */
21272 machine_mode cmp_mode;
21274 /* In AVX512F the result of comparison is an integer mask. */
21275 bool maskcmp = false;
21276 rtx x;
21278 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21280 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21281 gcc_assert (cmp_mode != BLKmode);
21283 maskcmp = true;
21285 else
21286 cmp_mode = cmp_ops_mode;
21289 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21290 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21291 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21293 if (optimize
21294 || reg_overlap_mentioned_p (dest, op_true)
21295 || reg_overlap_mentioned_p (dest, op_false))
21296 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21298 /* Compare patterns for int modes are unspec in AVX512F only. */
21299 if (maskcmp && (code == GT || code == EQ))
21301 rtx (*gen)(rtx, rtx, rtx);
21303 switch (cmp_ops_mode)
21305 case V64QImode:
21306 gcc_assert (TARGET_AVX512BW);
21307 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21308 break;
21309 case V32HImode:
21310 gcc_assert (TARGET_AVX512BW);
21311 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21312 break;
21313 case V16SImode:
21314 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21315 break;
21316 case V8DImode:
21317 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21318 break;
21319 default:
21320 gen = NULL;
21323 if (gen)
21325 emit_insn (gen (dest, cmp_op0, cmp_op1));
21326 return dest;
21329 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21331 if (cmp_mode != mode && !maskcmp)
21333 x = force_reg (cmp_ops_mode, x);
21334 convert_move (dest, x, false);
21336 else
21337 emit_insn (gen_rtx_SET (dest, x));
21339 return dest;
21342 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21343 operations. This is used for both scalar and vector conditional moves. */
21345 static void
21346 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21348 machine_mode mode = GET_MODE (dest);
21349 machine_mode cmpmode = GET_MODE (cmp);
21351 /* In AVX512F the result of comparison is an integer mask. */
21352 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21354 rtx t2, t3, x;
21356 if (vector_all_ones_operand (op_true, mode)
21357 && rtx_equal_p (op_false, CONST0_RTX (mode))
21358 && !maskcmp)
21360 emit_insn (gen_rtx_SET (dest, cmp));
21362 else if (op_false == CONST0_RTX (mode)
21363 && !maskcmp)
21365 op_true = force_reg (mode, op_true);
21366 x = gen_rtx_AND (mode, cmp, op_true);
21367 emit_insn (gen_rtx_SET (dest, x));
21369 else if (op_true == CONST0_RTX (mode)
21370 && !maskcmp)
21372 op_false = force_reg (mode, op_false);
21373 x = gen_rtx_NOT (mode, cmp);
21374 x = gen_rtx_AND (mode, x, op_false);
21375 emit_insn (gen_rtx_SET (dest, x));
21377 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21378 && !maskcmp)
21380 op_false = force_reg (mode, op_false);
21381 x = gen_rtx_IOR (mode, cmp, op_false);
21382 emit_insn (gen_rtx_SET (dest, x));
21384 else if (TARGET_XOP
21385 && !maskcmp)
21387 op_true = force_reg (mode, op_true);
21389 if (!nonimmediate_operand (op_false, mode))
21390 op_false = force_reg (mode, op_false);
21392 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21393 op_true,
21394 op_false)));
21396 else
21398 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21399 rtx d = dest;
21401 if (!nonimmediate_operand (op_true, mode))
21402 op_true = force_reg (mode, op_true);
21404 op_false = force_reg (mode, op_false);
21406 switch (mode)
21408 case V4SFmode:
21409 if (TARGET_SSE4_1)
21410 gen = gen_sse4_1_blendvps;
21411 break;
21412 case V2DFmode:
21413 if (TARGET_SSE4_1)
21414 gen = gen_sse4_1_blendvpd;
21415 break;
21416 case V16QImode:
21417 case V8HImode:
21418 case V4SImode:
21419 case V2DImode:
21420 if (TARGET_SSE4_1)
21422 gen = gen_sse4_1_pblendvb;
21423 if (mode != V16QImode)
21424 d = gen_reg_rtx (V16QImode);
21425 op_false = gen_lowpart (V16QImode, op_false);
21426 op_true = gen_lowpart (V16QImode, op_true);
21427 cmp = gen_lowpart (V16QImode, cmp);
21429 break;
21430 case V8SFmode:
21431 if (TARGET_AVX)
21432 gen = gen_avx_blendvps256;
21433 break;
21434 case V4DFmode:
21435 if (TARGET_AVX)
21436 gen = gen_avx_blendvpd256;
21437 break;
21438 case V32QImode:
21439 case V16HImode:
21440 case V8SImode:
21441 case V4DImode:
21442 if (TARGET_AVX2)
21444 gen = gen_avx2_pblendvb;
21445 if (mode != V32QImode)
21446 d = gen_reg_rtx (V32QImode);
21447 op_false = gen_lowpart (V32QImode, op_false);
21448 op_true = gen_lowpart (V32QImode, op_true);
21449 cmp = gen_lowpart (V32QImode, cmp);
21451 break;
21453 case V64QImode:
21454 gen = gen_avx512bw_blendmv64qi;
21455 break;
21456 case V32HImode:
21457 gen = gen_avx512bw_blendmv32hi;
21458 break;
21459 case V16SImode:
21460 gen = gen_avx512f_blendmv16si;
21461 break;
21462 case V8DImode:
21463 gen = gen_avx512f_blendmv8di;
21464 break;
21465 case V8DFmode:
21466 gen = gen_avx512f_blendmv8df;
21467 break;
21468 case V16SFmode:
21469 gen = gen_avx512f_blendmv16sf;
21470 break;
21472 default:
21473 break;
21476 if (gen != NULL)
21478 emit_insn (gen (d, op_false, op_true, cmp));
21479 if (d != dest)
21480 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21482 else
21484 op_true = force_reg (mode, op_true);
21486 t2 = gen_reg_rtx (mode);
21487 if (optimize)
21488 t3 = gen_reg_rtx (mode);
21489 else
21490 t3 = dest;
21492 x = gen_rtx_AND (mode, op_true, cmp);
21493 emit_insn (gen_rtx_SET (t2, x));
21495 x = gen_rtx_NOT (mode, cmp);
21496 x = gen_rtx_AND (mode, x, op_false);
21497 emit_insn (gen_rtx_SET (t3, x));
21499 x = gen_rtx_IOR (mode, t3, t2);
21500 emit_insn (gen_rtx_SET (dest, x));
21505 /* Expand a floating-point conditional move. Return true if successful. */
21507 bool
21508 ix86_expand_fp_movcc (rtx operands[])
21510 machine_mode mode = GET_MODE (operands[0]);
21511 enum rtx_code code = GET_CODE (operands[1]);
21512 rtx tmp, compare_op;
21513 rtx op0 = XEXP (operands[1], 0);
21514 rtx op1 = XEXP (operands[1], 1);
21516 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21518 machine_mode cmode;
21520 /* Since we've no cmove for sse registers, don't force bad register
21521 allocation just to gain access to it. Deny movcc when the
21522 comparison mode doesn't match the move mode. */
21523 cmode = GET_MODE (op0);
21524 if (cmode == VOIDmode)
21525 cmode = GET_MODE (op1);
21526 if (cmode != mode)
21527 return false;
21529 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21530 if (code == UNKNOWN)
21531 return false;
21533 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21534 operands[2], operands[3]))
21535 return true;
21537 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21538 operands[2], operands[3]);
21539 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21540 return true;
21543 if (GET_MODE (op0) == TImode
21544 || (GET_MODE (op0) == DImode
21545 && !TARGET_64BIT))
21546 return false;
21548 /* The floating point conditional move instructions don't directly
21549 support conditions resulting from a signed integer comparison. */
21551 compare_op = ix86_expand_compare (code, op0, op1);
21552 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21554 tmp = gen_reg_rtx (QImode);
21555 ix86_expand_setcc (tmp, code, op0, op1);
21557 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21560 emit_insn (gen_rtx_SET (operands[0],
21561 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21562 operands[2], operands[3])));
21564 return true;
21567 /* Expand a floating-point vector conditional move; a vcond operation
21568 rather than a movcc operation. */
21570 bool
21571 ix86_expand_fp_vcond (rtx operands[])
21573 enum rtx_code code = GET_CODE (operands[3]);
21574 rtx cmp;
21576 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21577 &operands[4], &operands[5]);
21578 if (code == UNKNOWN)
21580 rtx temp;
21581 switch (GET_CODE (operands[3]))
21583 case LTGT:
21584 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21585 operands[5], operands[0], operands[0]);
21586 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21587 operands[5], operands[1], operands[2]);
21588 code = AND;
21589 break;
21590 case UNEQ:
21591 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21592 operands[5], operands[0], operands[0]);
21593 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21594 operands[5], operands[1], operands[2]);
21595 code = IOR;
21596 break;
21597 default:
21598 gcc_unreachable ();
21600 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21601 OPTAB_DIRECT);
21602 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21603 return true;
21606 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21607 operands[5], operands[1], operands[2]))
21608 return true;
21610 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21611 operands[1], operands[2]);
21612 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21613 return true;
21616 /* Expand a signed/unsigned integral vector conditional move. */
21618 bool
21619 ix86_expand_int_vcond (rtx operands[])
21621 machine_mode data_mode = GET_MODE (operands[0]);
21622 machine_mode mode = GET_MODE (operands[4]);
21623 enum rtx_code code = GET_CODE (operands[3]);
21624 bool negate = false;
21625 rtx x, cop0, cop1;
21627 cop0 = operands[4];
21628 cop1 = operands[5];
21630 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21631 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21632 if ((code == LT || code == GE)
21633 && data_mode == mode
21634 && cop1 == CONST0_RTX (mode)
21635 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21636 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21637 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21638 && (GET_MODE_SIZE (data_mode) == 16
21639 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21641 rtx negop = operands[2 - (code == LT)];
21642 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21643 if (negop == CONST1_RTX (data_mode))
21645 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21646 operands[0], 1, OPTAB_DIRECT);
21647 if (res != operands[0])
21648 emit_move_insn (operands[0], res);
21649 return true;
21651 else if (GET_MODE_INNER (data_mode) != DImode
21652 && vector_all_ones_operand (negop, data_mode))
21654 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21655 operands[0], 0, OPTAB_DIRECT);
21656 if (res != operands[0])
21657 emit_move_insn (operands[0], res);
21658 return true;
21662 if (!nonimmediate_operand (cop1, mode))
21663 cop1 = force_reg (mode, cop1);
21664 if (!general_operand (operands[1], data_mode))
21665 operands[1] = force_reg (data_mode, operands[1]);
21666 if (!general_operand (operands[2], data_mode))
21667 operands[2] = force_reg (data_mode, operands[2]);
21669 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21670 if (TARGET_XOP
21671 && (mode == V16QImode || mode == V8HImode
21672 || mode == V4SImode || mode == V2DImode))
21674 else
21676 /* Canonicalize the comparison to EQ, GT, GTU. */
21677 switch (code)
21679 case EQ:
21680 case GT:
21681 case GTU:
21682 break;
21684 case NE:
21685 case LE:
21686 case LEU:
21687 code = reverse_condition (code);
21688 negate = true;
21689 break;
21691 case GE:
21692 case GEU:
21693 code = reverse_condition (code);
21694 negate = true;
21695 /* FALLTHRU */
21697 case LT:
21698 case LTU:
21699 std::swap (cop0, cop1);
21700 code = swap_condition (code);
21701 break;
21703 default:
21704 gcc_unreachable ();
21707 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21708 if (mode == V2DImode)
21710 switch (code)
21712 case EQ:
21713 /* SSE4.1 supports EQ. */
21714 if (!TARGET_SSE4_1)
21715 return false;
21716 break;
21718 case GT:
21719 case GTU:
21720 /* SSE4.2 supports GT/GTU. */
21721 if (!TARGET_SSE4_2)
21722 return false;
21723 break;
21725 default:
21726 gcc_unreachable ();
21730 /* Unsigned parallel compare is not supported by the hardware.
21731 Play some tricks to turn this into a signed comparison
21732 against 0. */
21733 if (code == GTU)
21735 cop0 = force_reg (mode, cop0);
21737 switch (mode)
21739 case V16SImode:
21740 case V8DImode:
21741 case V8SImode:
21742 case V4DImode:
21743 case V4SImode:
21744 case V2DImode:
21746 rtx t1, t2, mask;
21747 rtx (*gen_sub3) (rtx, rtx, rtx);
21749 switch (mode)
21751 case V16SImode: gen_sub3 = gen_subv16si3; break;
21752 case V8DImode: gen_sub3 = gen_subv8di3; break;
21753 case V8SImode: gen_sub3 = gen_subv8si3; break;
21754 case V4DImode: gen_sub3 = gen_subv4di3; break;
21755 case V4SImode: gen_sub3 = gen_subv4si3; break;
21756 case V2DImode: gen_sub3 = gen_subv2di3; break;
21757 default:
21758 gcc_unreachable ();
21760 /* Subtract (-(INT MAX) - 1) from both operands to make
21761 them signed. */
21762 mask = ix86_build_signbit_mask (mode, true, false);
21763 t1 = gen_reg_rtx (mode);
21764 emit_insn (gen_sub3 (t1, cop0, mask));
21766 t2 = gen_reg_rtx (mode);
21767 emit_insn (gen_sub3 (t2, cop1, mask));
21769 cop0 = t1;
21770 cop1 = t2;
21771 code = GT;
21773 break;
21775 case V64QImode:
21776 case V32HImode:
21777 case V32QImode:
21778 case V16HImode:
21779 case V16QImode:
21780 case V8HImode:
21781 /* Perform a parallel unsigned saturating subtraction. */
21782 x = gen_reg_rtx (mode);
21783 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21785 cop0 = x;
21786 cop1 = CONST0_RTX (mode);
21787 code = EQ;
21788 negate = !negate;
21789 break;
21791 default:
21792 gcc_unreachable ();
21797 /* Allow the comparison to be done in one mode, but the movcc to
21798 happen in another mode. */
21799 if (data_mode == mode)
21801 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21802 operands[1+negate], operands[2-negate]);
21804 else
21806 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21807 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21808 operands[1+negate], operands[2-negate]);
21809 if (GET_MODE (x) == mode)
21810 x = gen_lowpart (data_mode, x);
21813 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21814 operands[2-negate]);
21815 return true;
21818 /* AVX512F does support 64-byte integer vector operations,
21819 thus the longest vector we are faced with is V64QImode. */
21820 #define MAX_VECT_LEN 64
21822 struct expand_vec_perm_d
21824 rtx target, op0, op1;
21825 unsigned char perm[MAX_VECT_LEN];
21826 machine_mode vmode;
21827 unsigned char nelt;
21828 bool one_operand_p;
21829 bool testing_p;
21832 static bool
21833 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21834 struct expand_vec_perm_d *d)
21836 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21837 expander, so args are either in d, or in op0, op1 etc. */
21838 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21839 machine_mode maskmode = mode;
21840 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21842 switch (mode)
21844 case V8HImode:
21845 if (TARGET_AVX512VL && TARGET_AVX512BW)
21846 gen = gen_avx512vl_vpermi2varv8hi3;
21847 break;
21848 case V16HImode:
21849 if (TARGET_AVX512VL && TARGET_AVX512BW)
21850 gen = gen_avx512vl_vpermi2varv16hi3;
21851 break;
21852 case V64QImode:
21853 if (TARGET_AVX512VBMI)
21854 gen = gen_avx512bw_vpermi2varv64qi3;
21855 break;
21856 case V32HImode:
21857 if (TARGET_AVX512BW)
21858 gen = gen_avx512bw_vpermi2varv32hi3;
21859 break;
21860 case V4SImode:
21861 if (TARGET_AVX512VL)
21862 gen = gen_avx512vl_vpermi2varv4si3;
21863 break;
21864 case V8SImode:
21865 if (TARGET_AVX512VL)
21866 gen = gen_avx512vl_vpermi2varv8si3;
21867 break;
21868 case V16SImode:
21869 if (TARGET_AVX512F)
21870 gen = gen_avx512f_vpermi2varv16si3;
21871 break;
21872 case V4SFmode:
21873 if (TARGET_AVX512VL)
21875 gen = gen_avx512vl_vpermi2varv4sf3;
21876 maskmode = V4SImode;
21878 break;
21879 case V8SFmode:
21880 if (TARGET_AVX512VL)
21882 gen = gen_avx512vl_vpermi2varv8sf3;
21883 maskmode = V8SImode;
21885 break;
21886 case V16SFmode:
21887 if (TARGET_AVX512F)
21889 gen = gen_avx512f_vpermi2varv16sf3;
21890 maskmode = V16SImode;
21892 break;
21893 case V2DImode:
21894 if (TARGET_AVX512VL)
21895 gen = gen_avx512vl_vpermi2varv2di3;
21896 break;
21897 case V4DImode:
21898 if (TARGET_AVX512VL)
21899 gen = gen_avx512vl_vpermi2varv4di3;
21900 break;
21901 case V8DImode:
21902 if (TARGET_AVX512F)
21903 gen = gen_avx512f_vpermi2varv8di3;
21904 break;
21905 case V2DFmode:
21906 if (TARGET_AVX512VL)
21908 gen = gen_avx512vl_vpermi2varv2df3;
21909 maskmode = V2DImode;
21911 break;
21912 case V4DFmode:
21913 if (TARGET_AVX512VL)
21915 gen = gen_avx512vl_vpermi2varv4df3;
21916 maskmode = V4DImode;
21918 break;
21919 case V8DFmode:
21920 if (TARGET_AVX512F)
21922 gen = gen_avx512f_vpermi2varv8df3;
21923 maskmode = V8DImode;
21925 break;
21926 default:
21927 break;
21930 if (gen == NULL)
21931 return false;
21933 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21934 expander, so args are either in d, or in op0, op1 etc. */
21935 if (d)
21937 rtx vec[64];
21938 target = d->target;
21939 op0 = d->op0;
21940 op1 = d->op1;
21941 for (int i = 0; i < d->nelt; ++i)
21942 vec[i] = GEN_INT (d->perm[i]);
21943 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21946 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21947 return true;
21950 /* Expand a variable vector permutation. */
21952 void
21953 ix86_expand_vec_perm (rtx operands[])
21955 rtx target = operands[0];
21956 rtx op0 = operands[1];
21957 rtx op1 = operands[2];
21958 rtx mask = operands[3];
21959 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21960 machine_mode mode = GET_MODE (op0);
21961 machine_mode maskmode = GET_MODE (mask);
21962 int w, e, i;
21963 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21965 /* Number of elements in the vector. */
21966 w = GET_MODE_NUNITS (mode);
21967 e = GET_MODE_UNIT_SIZE (mode);
21968 gcc_assert (w <= 64);
21970 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21971 return;
21973 if (TARGET_AVX2)
21975 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21977 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21978 an constant shuffle operand. With a tiny bit of effort we can
21979 use VPERMD instead. A re-interpretation stall for V4DFmode is
21980 unfortunate but there's no avoiding it.
21981 Similarly for V16HImode we don't have instructions for variable
21982 shuffling, while for V32QImode we can use after preparing suitable
21983 masks vpshufb; vpshufb; vpermq; vpor. */
21985 if (mode == V16HImode)
21987 maskmode = mode = V32QImode;
21988 w = 32;
21989 e = 1;
21991 else
21993 maskmode = mode = V8SImode;
21994 w = 8;
21995 e = 4;
21997 t1 = gen_reg_rtx (maskmode);
21999 /* Replicate the low bits of the V4DImode mask into V8SImode:
22000 mask = { A B C D }
22001 t1 = { A A B B C C D D }. */
22002 for (i = 0; i < w / 2; ++i)
22003 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22004 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22005 vt = force_reg (maskmode, vt);
22006 mask = gen_lowpart (maskmode, mask);
22007 if (maskmode == V8SImode)
22008 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22009 else
22010 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22012 /* Multiply the shuffle indicies by two. */
22013 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22014 OPTAB_DIRECT);
22016 /* Add one to the odd shuffle indicies:
22017 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22018 for (i = 0; i < w / 2; ++i)
22020 vec[i * 2] = const0_rtx;
22021 vec[i * 2 + 1] = const1_rtx;
22023 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22024 vt = validize_mem (force_const_mem (maskmode, vt));
22025 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22026 OPTAB_DIRECT);
22028 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22029 operands[3] = mask = t1;
22030 target = gen_reg_rtx (mode);
22031 op0 = gen_lowpart (mode, op0);
22032 op1 = gen_lowpart (mode, op1);
22035 switch (mode)
22037 case V8SImode:
22038 /* The VPERMD and VPERMPS instructions already properly ignore
22039 the high bits of the shuffle elements. No need for us to
22040 perform an AND ourselves. */
22041 if (one_operand_shuffle)
22043 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22044 if (target != operands[0])
22045 emit_move_insn (operands[0],
22046 gen_lowpart (GET_MODE (operands[0]), target));
22048 else
22050 t1 = gen_reg_rtx (V8SImode);
22051 t2 = gen_reg_rtx (V8SImode);
22052 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22053 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22054 goto merge_two;
22056 return;
22058 case V8SFmode:
22059 mask = gen_lowpart (V8SImode, mask);
22060 if (one_operand_shuffle)
22061 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22062 else
22064 t1 = gen_reg_rtx (V8SFmode);
22065 t2 = gen_reg_rtx (V8SFmode);
22066 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22067 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22068 goto merge_two;
22070 return;
22072 case V4SImode:
22073 /* By combining the two 128-bit input vectors into one 256-bit
22074 input vector, we can use VPERMD and VPERMPS for the full
22075 two-operand shuffle. */
22076 t1 = gen_reg_rtx (V8SImode);
22077 t2 = gen_reg_rtx (V8SImode);
22078 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22079 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22080 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22081 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22082 return;
22084 case V4SFmode:
22085 t1 = gen_reg_rtx (V8SFmode);
22086 t2 = gen_reg_rtx (V8SImode);
22087 mask = gen_lowpart (V4SImode, mask);
22088 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22089 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22090 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22091 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22092 return;
22094 case V32QImode:
22095 t1 = gen_reg_rtx (V32QImode);
22096 t2 = gen_reg_rtx (V32QImode);
22097 t3 = gen_reg_rtx (V32QImode);
22098 vt2 = GEN_INT (-128);
22099 for (i = 0; i < 32; i++)
22100 vec[i] = vt2;
22101 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22102 vt = force_reg (V32QImode, vt);
22103 for (i = 0; i < 32; i++)
22104 vec[i] = i < 16 ? vt2 : const0_rtx;
22105 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22106 vt2 = force_reg (V32QImode, vt2);
22107 /* From mask create two adjusted masks, which contain the same
22108 bits as mask in the low 7 bits of each vector element.
22109 The first mask will have the most significant bit clear
22110 if it requests element from the same 128-bit lane
22111 and MSB set if it requests element from the other 128-bit lane.
22112 The second mask will have the opposite values of the MSB,
22113 and additionally will have its 128-bit lanes swapped.
22114 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22115 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22116 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22117 stands for other 12 bytes. */
22118 /* The bit whether element is from the same lane or the other
22119 lane is bit 4, so shift it up by 3 to the MSB position. */
22120 t5 = gen_reg_rtx (V4DImode);
22121 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22122 GEN_INT (3)));
22123 /* Clear MSB bits from the mask just in case it had them set. */
22124 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22125 /* After this t1 will have MSB set for elements from other lane. */
22126 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22127 /* Clear bits other than MSB. */
22128 emit_insn (gen_andv32qi3 (t1, t1, vt));
22129 /* Or in the lower bits from mask into t3. */
22130 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22131 /* And invert MSB bits in t1, so MSB is set for elements from the same
22132 lane. */
22133 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22134 /* Swap 128-bit lanes in t3. */
22135 t6 = gen_reg_rtx (V4DImode);
22136 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22137 const2_rtx, GEN_INT (3),
22138 const0_rtx, const1_rtx));
22139 /* And or in the lower bits from mask into t1. */
22140 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22141 if (one_operand_shuffle)
22143 /* Each of these shuffles will put 0s in places where
22144 element from the other 128-bit lane is needed, otherwise
22145 will shuffle in the requested value. */
22146 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22147 gen_lowpart (V32QImode, t6)));
22148 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22149 /* For t3 the 128-bit lanes are swapped again. */
22150 t7 = gen_reg_rtx (V4DImode);
22151 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22152 const2_rtx, GEN_INT (3),
22153 const0_rtx, const1_rtx));
22154 /* And oring both together leads to the result. */
22155 emit_insn (gen_iorv32qi3 (target, t1,
22156 gen_lowpart (V32QImode, t7)));
22157 if (target != operands[0])
22158 emit_move_insn (operands[0],
22159 gen_lowpart (GET_MODE (operands[0]), target));
22160 return;
22163 t4 = gen_reg_rtx (V32QImode);
22164 /* Similarly to the above one_operand_shuffle code,
22165 just for repeated twice for each operand. merge_two:
22166 code will merge the two results together. */
22167 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22168 gen_lowpart (V32QImode, t6)));
22169 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22170 gen_lowpart (V32QImode, t6)));
22171 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22172 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22173 t7 = gen_reg_rtx (V4DImode);
22174 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22175 const2_rtx, GEN_INT (3),
22176 const0_rtx, const1_rtx));
22177 t8 = gen_reg_rtx (V4DImode);
22178 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22179 const2_rtx, GEN_INT (3),
22180 const0_rtx, const1_rtx));
22181 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22182 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22183 t1 = t4;
22184 t2 = t3;
22185 goto merge_two;
22187 default:
22188 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22189 break;
22193 if (TARGET_XOP)
22195 /* The XOP VPPERM insn supports three inputs. By ignoring the
22196 one_operand_shuffle special case, we avoid creating another
22197 set of constant vectors in memory. */
22198 one_operand_shuffle = false;
22200 /* mask = mask & {2*w-1, ...} */
22201 vt = GEN_INT (2*w - 1);
22203 else
22205 /* mask = mask & {w-1, ...} */
22206 vt = GEN_INT (w - 1);
22209 for (i = 0; i < w; i++)
22210 vec[i] = vt;
22211 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22212 mask = expand_simple_binop (maskmode, AND, mask, vt,
22213 NULL_RTX, 0, OPTAB_DIRECT);
22215 /* For non-QImode operations, convert the word permutation control
22216 into a byte permutation control. */
22217 if (mode != V16QImode)
22219 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22220 GEN_INT (exact_log2 (e)),
22221 NULL_RTX, 0, OPTAB_DIRECT);
22223 /* Convert mask to vector of chars. */
22224 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22226 /* Replicate each of the input bytes into byte positions:
22227 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22228 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22229 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22230 for (i = 0; i < 16; ++i)
22231 vec[i] = GEN_INT (i/e * e);
22232 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22233 vt = validize_mem (force_const_mem (V16QImode, vt));
22234 if (TARGET_XOP)
22235 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22236 else
22237 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22239 /* Convert it into the byte positions by doing
22240 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22241 for (i = 0; i < 16; ++i)
22242 vec[i] = GEN_INT (i % e);
22243 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22244 vt = validize_mem (force_const_mem (V16QImode, vt));
22245 emit_insn (gen_addv16qi3 (mask, mask, vt));
22248 /* The actual shuffle operations all operate on V16QImode. */
22249 op0 = gen_lowpart (V16QImode, op0);
22250 op1 = gen_lowpart (V16QImode, op1);
22252 if (TARGET_XOP)
22254 if (GET_MODE (target) != V16QImode)
22255 target = gen_reg_rtx (V16QImode);
22256 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22257 if (target != operands[0])
22258 emit_move_insn (operands[0],
22259 gen_lowpart (GET_MODE (operands[0]), target));
22261 else if (one_operand_shuffle)
22263 if (GET_MODE (target) != V16QImode)
22264 target = gen_reg_rtx (V16QImode);
22265 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22266 if (target != operands[0])
22267 emit_move_insn (operands[0],
22268 gen_lowpart (GET_MODE (operands[0]), target));
22270 else
22272 rtx xops[6];
22273 bool ok;
22275 /* Shuffle the two input vectors independently. */
22276 t1 = gen_reg_rtx (V16QImode);
22277 t2 = gen_reg_rtx (V16QImode);
22278 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22279 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22281 merge_two:
22282 /* Then merge them together. The key is whether any given control
22283 element contained a bit set that indicates the second word. */
22284 mask = operands[3];
22285 vt = GEN_INT (w);
22286 if (maskmode == V2DImode && !TARGET_SSE4_1)
22288 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22289 more shuffle to convert the V2DI input mask into a V4SI
22290 input mask. At which point the masking that expand_int_vcond
22291 will work as desired. */
22292 rtx t3 = gen_reg_rtx (V4SImode);
22293 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22294 const0_rtx, const0_rtx,
22295 const2_rtx, const2_rtx));
22296 mask = t3;
22297 maskmode = V4SImode;
22298 e = w = 4;
22301 for (i = 0; i < w; i++)
22302 vec[i] = vt;
22303 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22304 vt = force_reg (maskmode, vt);
22305 mask = expand_simple_binop (maskmode, AND, mask, vt,
22306 NULL_RTX, 0, OPTAB_DIRECT);
22308 if (GET_MODE (target) != mode)
22309 target = gen_reg_rtx (mode);
22310 xops[0] = target;
22311 xops[1] = gen_lowpart (mode, t2);
22312 xops[2] = gen_lowpart (mode, t1);
22313 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22314 xops[4] = mask;
22315 xops[5] = vt;
22316 ok = ix86_expand_int_vcond (xops);
22317 gcc_assert (ok);
22318 if (target != operands[0])
22319 emit_move_insn (operands[0],
22320 gen_lowpart (GET_MODE (operands[0]), target));
22324 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22325 true if we should do zero extension, else sign extension. HIGH_P is
22326 true if we want the N/2 high elements, else the low elements. */
22328 void
22329 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22331 machine_mode imode = GET_MODE (src);
22332 rtx tmp;
22334 if (TARGET_SSE4_1)
22336 rtx (*unpack)(rtx, rtx);
22337 rtx (*extract)(rtx, rtx) = NULL;
22338 machine_mode halfmode = BLKmode;
22340 switch (imode)
22342 case V64QImode:
22343 if (unsigned_p)
22344 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22345 else
22346 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22347 halfmode = V32QImode;
22348 extract
22349 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22350 break;
22351 case V32QImode:
22352 if (unsigned_p)
22353 unpack = gen_avx2_zero_extendv16qiv16hi2;
22354 else
22355 unpack = gen_avx2_sign_extendv16qiv16hi2;
22356 halfmode = V16QImode;
22357 extract
22358 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22359 break;
22360 case V32HImode:
22361 if (unsigned_p)
22362 unpack = gen_avx512f_zero_extendv16hiv16si2;
22363 else
22364 unpack = gen_avx512f_sign_extendv16hiv16si2;
22365 halfmode = V16HImode;
22366 extract
22367 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22368 break;
22369 case V16HImode:
22370 if (unsigned_p)
22371 unpack = gen_avx2_zero_extendv8hiv8si2;
22372 else
22373 unpack = gen_avx2_sign_extendv8hiv8si2;
22374 halfmode = V8HImode;
22375 extract
22376 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22377 break;
22378 case V16SImode:
22379 if (unsigned_p)
22380 unpack = gen_avx512f_zero_extendv8siv8di2;
22381 else
22382 unpack = gen_avx512f_sign_extendv8siv8di2;
22383 halfmode = V8SImode;
22384 extract
22385 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22386 break;
22387 case V8SImode:
22388 if (unsigned_p)
22389 unpack = gen_avx2_zero_extendv4siv4di2;
22390 else
22391 unpack = gen_avx2_sign_extendv4siv4di2;
22392 halfmode = V4SImode;
22393 extract
22394 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22395 break;
22396 case V16QImode:
22397 if (unsigned_p)
22398 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22399 else
22400 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22401 break;
22402 case V8HImode:
22403 if (unsigned_p)
22404 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22405 else
22406 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22407 break;
22408 case V4SImode:
22409 if (unsigned_p)
22410 unpack = gen_sse4_1_zero_extendv2siv2di2;
22411 else
22412 unpack = gen_sse4_1_sign_extendv2siv2di2;
22413 break;
22414 default:
22415 gcc_unreachable ();
22418 if (GET_MODE_SIZE (imode) >= 32)
22420 tmp = gen_reg_rtx (halfmode);
22421 emit_insn (extract (tmp, src));
22423 else if (high_p)
22425 /* Shift higher 8 bytes to lower 8 bytes. */
22426 tmp = gen_reg_rtx (V1TImode);
22427 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22428 GEN_INT (64)));
22429 tmp = gen_lowpart (imode, tmp);
22431 else
22432 tmp = src;
22434 emit_insn (unpack (dest, tmp));
22436 else
22438 rtx (*unpack)(rtx, rtx, rtx);
22440 switch (imode)
22442 case V16QImode:
22443 if (high_p)
22444 unpack = gen_vec_interleave_highv16qi;
22445 else
22446 unpack = gen_vec_interleave_lowv16qi;
22447 break;
22448 case V8HImode:
22449 if (high_p)
22450 unpack = gen_vec_interleave_highv8hi;
22451 else
22452 unpack = gen_vec_interleave_lowv8hi;
22453 break;
22454 case V4SImode:
22455 if (high_p)
22456 unpack = gen_vec_interleave_highv4si;
22457 else
22458 unpack = gen_vec_interleave_lowv4si;
22459 break;
22460 default:
22461 gcc_unreachable ();
22464 if (unsigned_p)
22465 tmp = force_reg (imode, CONST0_RTX (imode));
22466 else
22467 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22468 src, pc_rtx, pc_rtx);
22470 rtx tmp2 = gen_reg_rtx (imode);
22471 emit_insn (unpack (tmp2, src, tmp));
22472 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22476 /* Expand conditional increment or decrement using adb/sbb instructions.
22477 The default case using setcc followed by the conditional move can be
22478 done by generic code. */
22479 bool
22480 ix86_expand_int_addcc (rtx operands[])
22482 enum rtx_code code = GET_CODE (operands[1]);
22483 rtx flags;
22484 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22485 rtx compare_op;
22486 rtx val = const0_rtx;
22487 bool fpcmp = false;
22488 machine_mode mode;
22489 rtx op0 = XEXP (operands[1], 0);
22490 rtx op1 = XEXP (operands[1], 1);
22492 if (operands[3] != const1_rtx
22493 && operands[3] != constm1_rtx)
22494 return false;
22495 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22496 return false;
22497 code = GET_CODE (compare_op);
22499 flags = XEXP (compare_op, 0);
22501 if (GET_MODE (flags) == CCFPmode
22502 || GET_MODE (flags) == CCFPUmode)
22504 fpcmp = true;
22505 code = ix86_fp_compare_code_to_integer (code);
22508 if (code != LTU)
22510 val = constm1_rtx;
22511 if (fpcmp)
22512 PUT_CODE (compare_op,
22513 reverse_condition_maybe_unordered
22514 (GET_CODE (compare_op)));
22515 else
22516 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22519 mode = GET_MODE (operands[0]);
22521 /* Construct either adc or sbb insn. */
22522 if ((code == LTU) == (operands[3] == constm1_rtx))
22524 switch (mode)
22526 case QImode:
22527 insn = gen_subqi3_carry;
22528 break;
22529 case HImode:
22530 insn = gen_subhi3_carry;
22531 break;
22532 case SImode:
22533 insn = gen_subsi3_carry;
22534 break;
22535 case DImode:
22536 insn = gen_subdi3_carry;
22537 break;
22538 default:
22539 gcc_unreachable ();
22542 else
22544 switch (mode)
22546 case QImode:
22547 insn = gen_addqi3_carry;
22548 break;
22549 case HImode:
22550 insn = gen_addhi3_carry;
22551 break;
22552 case SImode:
22553 insn = gen_addsi3_carry;
22554 break;
22555 case DImode:
22556 insn = gen_adddi3_carry;
22557 break;
22558 default:
22559 gcc_unreachable ();
22562 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22564 return true;
22568 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22569 but works for floating pointer parameters and nonoffsetable memories.
22570 For pushes, it returns just stack offsets; the values will be saved
22571 in the right order. Maximally three parts are generated. */
22573 static int
22574 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22576 int size;
22578 if (!TARGET_64BIT)
22579 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22580 else
22581 size = (GET_MODE_SIZE (mode) + 4) / 8;
22583 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22584 gcc_assert (size >= 2 && size <= 4);
22586 /* Optimize constant pool reference to immediates. This is used by fp
22587 moves, that force all constants to memory to allow combining. */
22588 if (MEM_P (operand) && MEM_READONLY_P (operand))
22590 rtx tmp = maybe_get_pool_constant (operand);
22591 if (tmp)
22592 operand = tmp;
22595 if (MEM_P (operand) && !offsettable_memref_p (operand))
22597 /* The only non-offsetable memories we handle are pushes. */
22598 int ok = push_operand (operand, VOIDmode);
22600 gcc_assert (ok);
22602 operand = copy_rtx (operand);
22603 PUT_MODE (operand, word_mode);
22604 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22605 return size;
22608 if (GET_CODE (operand) == CONST_VECTOR)
22610 machine_mode imode = int_mode_for_mode (mode);
22611 /* Caution: if we looked through a constant pool memory above,
22612 the operand may actually have a different mode now. That's
22613 ok, since we want to pun this all the way back to an integer. */
22614 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22615 gcc_assert (operand != NULL);
22616 mode = imode;
22619 if (!TARGET_64BIT)
22621 if (mode == DImode)
22622 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22623 else
22625 int i;
22627 if (REG_P (operand))
22629 gcc_assert (reload_completed);
22630 for (i = 0; i < size; i++)
22631 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22633 else if (offsettable_memref_p (operand))
22635 operand = adjust_address (operand, SImode, 0);
22636 parts[0] = operand;
22637 for (i = 1; i < size; i++)
22638 parts[i] = adjust_address (operand, SImode, 4 * i);
22640 else if (CONST_DOUBLE_P (operand))
22642 REAL_VALUE_TYPE r;
22643 long l[4];
22645 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22646 switch (mode)
22648 case TFmode:
22649 real_to_target (l, &r, mode);
22650 parts[3] = gen_int_mode (l[3], SImode);
22651 parts[2] = gen_int_mode (l[2], SImode);
22652 break;
22653 case XFmode:
22654 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22655 long double may not be 80-bit. */
22656 real_to_target (l, &r, mode);
22657 parts[2] = gen_int_mode (l[2], SImode);
22658 break;
22659 case DFmode:
22660 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22661 break;
22662 default:
22663 gcc_unreachable ();
22665 parts[1] = gen_int_mode (l[1], SImode);
22666 parts[0] = gen_int_mode (l[0], SImode);
22668 else
22669 gcc_unreachable ();
22672 else
22674 if (mode == TImode)
22675 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22676 if (mode == XFmode || mode == TFmode)
22678 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22679 if (REG_P (operand))
22681 gcc_assert (reload_completed);
22682 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22683 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22685 else if (offsettable_memref_p (operand))
22687 operand = adjust_address (operand, DImode, 0);
22688 parts[0] = operand;
22689 parts[1] = adjust_address (operand, upper_mode, 8);
22691 else if (CONST_DOUBLE_P (operand))
22693 REAL_VALUE_TYPE r;
22694 long l[4];
22696 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22697 real_to_target (l, &r, mode);
22699 /* real_to_target puts 32-bit pieces in each long. */
22700 parts[0] =
22701 gen_int_mode
22702 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22703 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22704 DImode);
22706 if (upper_mode == SImode)
22707 parts[1] = gen_int_mode (l[2], SImode);
22708 else
22709 parts[1] =
22710 gen_int_mode
22711 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22712 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22713 DImode);
22715 else
22716 gcc_unreachable ();
22720 return size;
22723 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22724 Return false when normal moves are needed; true when all required
22725 insns have been emitted. Operands 2-4 contain the input values
22726 int the correct order; operands 5-7 contain the output values. */
22728 void
22729 ix86_split_long_move (rtx operands[])
22731 rtx part[2][4];
22732 int nparts, i, j;
22733 int push = 0;
22734 int collisions = 0;
22735 machine_mode mode = GET_MODE (operands[0]);
22736 bool collisionparts[4];
22738 /* The DFmode expanders may ask us to move double.
22739 For 64bit target this is single move. By hiding the fact
22740 here we simplify i386.md splitters. */
22741 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22743 /* Optimize constant pool reference to immediates. This is used by
22744 fp moves, that force all constants to memory to allow combining. */
22746 if (MEM_P (operands[1])
22747 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22748 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22749 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22750 if (push_operand (operands[0], VOIDmode))
22752 operands[0] = copy_rtx (operands[0]);
22753 PUT_MODE (operands[0], word_mode);
22755 else
22756 operands[0] = gen_lowpart (DImode, operands[0]);
22757 operands[1] = gen_lowpart (DImode, operands[1]);
22758 emit_move_insn (operands[0], operands[1]);
22759 return;
22762 /* The only non-offsettable memory we handle is push. */
22763 if (push_operand (operands[0], VOIDmode))
22764 push = 1;
22765 else
22766 gcc_assert (!MEM_P (operands[0])
22767 || offsettable_memref_p (operands[0]));
22769 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22770 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22772 /* When emitting push, take care for source operands on the stack. */
22773 if (push && MEM_P (operands[1])
22774 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22776 rtx src_base = XEXP (part[1][nparts - 1], 0);
22778 /* Compensate for the stack decrement by 4. */
22779 if (!TARGET_64BIT && nparts == 3
22780 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22781 src_base = plus_constant (Pmode, src_base, 4);
22783 /* src_base refers to the stack pointer and is
22784 automatically decreased by emitted push. */
22785 for (i = 0; i < nparts; i++)
22786 part[1][i] = change_address (part[1][i],
22787 GET_MODE (part[1][i]), src_base);
22790 /* We need to do copy in the right order in case an address register
22791 of the source overlaps the destination. */
22792 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22794 rtx tmp;
22796 for (i = 0; i < nparts; i++)
22798 collisionparts[i]
22799 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22800 if (collisionparts[i])
22801 collisions++;
22804 /* Collision in the middle part can be handled by reordering. */
22805 if (collisions == 1 && nparts == 3 && collisionparts [1])
22807 std::swap (part[0][1], part[0][2]);
22808 std::swap (part[1][1], part[1][2]);
22810 else if (collisions == 1
22811 && nparts == 4
22812 && (collisionparts [1] || collisionparts [2]))
22814 if (collisionparts [1])
22816 std::swap (part[0][1], part[0][2]);
22817 std::swap (part[1][1], part[1][2]);
22819 else
22821 std::swap (part[0][2], part[0][3]);
22822 std::swap (part[1][2], part[1][3]);
22826 /* If there are more collisions, we can't handle it by reordering.
22827 Do an lea to the last part and use only one colliding move. */
22828 else if (collisions > 1)
22830 rtx base;
22832 collisions = 1;
22834 base = part[0][nparts - 1];
22836 /* Handle the case when the last part isn't valid for lea.
22837 Happens in 64-bit mode storing the 12-byte XFmode. */
22838 if (GET_MODE (base) != Pmode)
22839 base = gen_rtx_REG (Pmode, REGNO (base));
22841 emit_insn (gen_rtx_SET (base, XEXP (part[1][0], 0)));
22842 part[1][0] = replace_equiv_address (part[1][0], base);
22843 for (i = 1; i < nparts; i++)
22845 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22846 part[1][i] = replace_equiv_address (part[1][i], tmp);
22851 if (push)
22853 if (!TARGET_64BIT)
22855 if (nparts == 3)
22857 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22858 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22859 stack_pointer_rtx, GEN_INT (-4)));
22860 emit_move_insn (part[0][2], part[1][2]);
22862 else if (nparts == 4)
22864 emit_move_insn (part[0][3], part[1][3]);
22865 emit_move_insn (part[0][2], part[1][2]);
22868 else
22870 /* In 64bit mode we don't have 32bit push available. In case this is
22871 register, it is OK - we will just use larger counterpart. We also
22872 retype memory - these comes from attempt to avoid REX prefix on
22873 moving of second half of TFmode value. */
22874 if (GET_MODE (part[1][1]) == SImode)
22876 switch (GET_CODE (part[1][1]))
22878 case MEM:
22879 part[1][1] = adjust_address (part[1][1], DImode, 0);
22880 break;
22882 case REG:
22883 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22884 break;
22886 default:
22887 gcc_unreachable ();
22890 if (GET_MODE (part[1][0]) == SImode)
22891 part[1][0] = part[1][1];
22894 emit_move_insn (part[0][1], part[1][1]);
22895 emit_move_insn (part[0][0], part[1][0]);
22896 return;
22899 /* Choose correct order to not overwrite the source before it is copied. */
22900 if ((REG_P (part[0][0])
22901 && REG_P (part[1][1])
22902 && (REGNO (part[0][0]) == REGNO (part[1][1])
22903 || (nparts == 3
22904 && REGNO (part[0][0]) == REGNO (part[1][2]))
22905 || (nparts == 4
22906 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22907 || (collisions > 0
22908 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22910 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22912 operands[2 + i] = part[0][j];
22913 operands[6 + i] = part[1][j];
22916 else
22918 for (i = 0; i < nparts; i++)
22920 operands[2 + i] = part[0][i];
22921 operands[6 + i] = part[1][i];
22925 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22926 if (optimize_insn_for_size_p ())
22928 for (j = 0; j < nparts - 1; j++)
22929 if (CONST_INT_P (operands[6 + j])
22930 && operands[6 + j] != const0_rtx
22931 && REG_P (operands[2 + j]))
22932 for (i = j; i < nparts - 1; i++)
22933 if (CONST_INT_P (operands[7 + i])
22934 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22935 operands[7 + i] = operands[2 + j];
22938 for (i = 0; i < nparts; i++)
22939 emit_move_insn (operands[2 + i], operands[6 + i]);
22941 return;
22944 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22945 left shift by a constant, either using a single shift or
22946 a sequence of add instructions. */
22948 static void
22949 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
22951 rtx (*insn)(rtx, rtx, rtx);
22953 if (count == 1
22954 || (count * ix86_cost->add <= ix86_cost->shift_const
22955 && !optimize_insn_for_size_p ()))
22957 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22958 while (count-- > 0)
22959 emit_insn (insn (operand, operand, operand));
22961 else
22963 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22964 emit_insn (insn (operand, operand, GEN_INT (count)));
22968 void
22969 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
22971 rtx (*gen_ashl3)(rtx, rtx, rtx);
22972 rtx (*gen_shld)(rtx, rtx, rtx);
22973 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22975 rtx low[2], high[2];
22976 int count;
22978 if (CONST_INT_P (operands[2]))
22980 split_double_mode (mode, operands, 2, low, high);
22981 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22983 if (count >= half_width)
22985 emit_move_insn (high[0], low[1]);
22986 emit_move_insn (low[0], const0_rtx);
22988 if (count > half_width)
22989 ix86_expand_ashl_const (high[0], count - half_width, mode);
22991 else
22993 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22995 if (!rtx_equal_p (operands[0], operands[1]))
22996 emit_move_insn (operands[0], operands[1]);
22998 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22999 ix86_expand_ashl_const (low[0], count, mode);
23001 return;
23004 split_double_mode (mode, operands, 1, low, high);
23006 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23008 if (operands[1] == const1_rtx)
23010 /* Assuming we've chosen a QImode capable registers, then 1 << N
23011 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23012 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23014 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23016 ix86_expand_clear (low[0]);
23017 ix86_expand_clear (high[0]);
23018 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23020 d = gen_lowpart (QImode, low[0]);
23021 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23022 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23023 emit_insn (gen_rtx_SET (d, s));
23025 d = gen_lowpart (QImode, high[0]);
23026 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23027 s = gen_rtx_NE (QImode, flags, const0_rtx);
23028 emit_insn (gen_rtx_SET (d, s));
23031 /* Otherwise, we can get the same results by manually performing
23032 a bit extract operation on bit 5/6, and then performing the two
23033 shifts. The two methods of getting 0/1 into low/high are exactly
23034 the same size. Avoiding the shift in the bit extract case helps
23035 pentium4 a bit; no one else seems to care much either way. */
23036 else
23038 machine_mode half_mode;
23039 rtx (*gen_lshr3)(rtx, rtx, rtx);
23040 rtx (*gen_and3)(rtx, rtx, rtx);
23041 rtx (*gen_xor3)(rtx, rtx, rtx);
23042 HOST_WIDE_INT bits;
23043 rtx x;
23045 if (mode == DImode)
23047 half_mode = SImode;
23048 gen_lshr3 = gen_lshrsi3;
23049 gen_and3 = gen_andsi3;
23050 gen_xor3 = gen_xorsi3;
23051 bits = 5;
23053 else
23055 half_mode = DImode;
23056 gen_lshr3 = gen_lshrdi3;
23057 gen_and3 = gen_anddi3;
23058 gen_xor3 = gen_xordi3;
23059 bits = 6;
23062 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23063 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23064 else
23065 x = gen_lowpart (half_mode, operands[2]);
23066 emit_insn (gen_rtx_SET (high[0], x));
23068 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23069 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23070 emit_move_insn (low[0], high[0]);
23071 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23074 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23075 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23076 return;
23079 if (operands[1] == constm1_rtx)
23081 /* For -1 << N, we can avoid the shld instruction, because we
23082 know that we're shifting 0...31/63 ones into a -1. */
23083 emit_move_insn (low[0], constm1_rtx);
23084 if (optimize_insn_for_size_p ())
23085 emit_move_insn (high[0], low[0]);
23086 else
23087 emit_move_insn (high[0], constm1_rtx);
23089 else
23091 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23093 if (!rtx_equal_p (operands[0], operands[1]))
23094 emit_move_insn (operands[0], operands[1]);
23096 split_double_mode (mode, operands, 1, low, high);
23097 emit_insn (gen_shld (high[0], low[0], operands[2]));
23100 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23102 if (TARGET_CMOVE && scratch)
23104 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23105 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23107 ix86_expand_clear (scratch);
23108 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23110 else
23112 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23113 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23115 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23119 void
23120 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23122 rtx (*gen_ashr3)(rtx, rtx, rtx)
23123 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23124 rtx (*gen_shrd)(rtx, rtx, rtx);
23125 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23127 rtx low[2], high[2];
23128 int count;
23130 if (CONST_INT_P (operands[2]))
23132 split_double_mode (mode, operands, 2, low, high);
23133 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23135 if (count == GET_MODE_BITSIZE (mode) - 1)
23137 emit_move_insn (high[0], high[1]);
23138 emit_insn (gen_ashr3 (high[0], high[0],
23139 GEN_INT (half_width - 1)));
23140 emit_move_insn (low[0], high[0]);
23143 else if (count >= half_width)
23145 emit_move_insn (low[0], high[1]);
23146 emit_move_insn (high[0], low[0]);
23147 emit_insn (gen_ashr3 (high[0], high[0],
23148 GEN_INT (half_width - 1)));
23150 if (count > half_width)
23151 emit_insn (gen_ashr3 (low[0], low[0],
23152 GEN_INT (count - half_width)));
23154 else
23156 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23158 if (!rtx_equal_p (operands[0], operands[1]))
23159 emit_move_insn (operands[0], operands[1]);
23161 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23162 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23165 else
23167 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23169 if (!rtx_equal_p (operands[0], operands[1]))
23170 emit_move_insn (operands[0], operands[1]);
23172 split_double_mode (mode, operands, 1, low, high);
23174 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23175 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23177 if (TARGET_CMOVE && scratch)
23179 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23180 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23182 emit_move_insn (scratch, high[0]);
23183 emit_insn (gen_ashr3 (scratch, scratch,
23184 GEN_INT (half_width - 1)));
23185 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23186 scratch));
23188 else
23190 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23191 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23193 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23198 void
23199 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23201 rtx (*gen_lshr3)(rtx, rtx, rtx)
23202 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23203 rtx (*gen_shrd)(rtx, rtx, rtx);
23204 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23206 rtx low[2], high[2];
23207 int count;
23209 if (CONST_INT_P (operands[2]))
23211 split_double_mode (mode, operands, 2, low, high);
23212 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23214 if (count >= half_width)
23216 emit_move_insn (low[0], high[1]);
23217 ix86_expand_clear (high[0]);
23219 if (count > half_width)
23220 emit_insn (gen_lshr3 (low[0], low[0],
23221 GEN_INT (count - half_width)));
23223 else
23225 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23227 if (!rtx_equal_p (operands[0], operands[1]))
23228 emit_move_insn (operands[0], operands[1]);
23230 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23231 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23234 else
23236 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23238 if (!rtx_equal_p (operands[0], operands[1]))
23239 emit_move_insn (operands[0], operands[1]);
23241 split_double_mode (mode, operands, 1, low, high);
23243 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23244 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23246 if (TARGET_CMOVE && scratch)
23248 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23249 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23251 ix86_expand_clear (scratch);
23252 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23253 scratch));
23255 else
23257 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23258 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23260 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23265 /* Predict just emitted jump instruction to be taken with probability PROB. */
23266 static void
23267 predict_jump (int prob)
23269 rtx insn = get_last_insn ();
23270 gcc_assert (JUMP_P (insn));
23271 add_int_reg_note (insn, REG_BR_PROB, prob);
23274 /* Helper function for the string operations below. Dest VARIABLE whether
23275 it is aligned to VALUE bytes. If true, jump to the label. */
23276 static rtx_code_label *
23277 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23279 rtx_code_label *label = gen_label_rtx ();
23280 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23281 if (GET_MODE (variable) == DImode)
23282 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23283 else
23284 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23285 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23286 1, label);
23287 if (epilogue)
23288 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23289 else
23290 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23291 return label;
23294 /* Adjust COUNTER by the VALUE. */
23295 static void
23296 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23298 rtx (*gen_add)(rtx, rtx, rtx)
23299 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23301 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23304 /* Zero extend possibly SImode EXP to Pmode register. */
23306 ix86_zero_extend_to_Pmode (rtx exp)
23308 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23311 /* Divide COUNTREG by SCALE. */
23312 static rtx
23313 scale_counter (rtx countreg, int scale)
23315 rtx sc;
23317 if (scale == 1)
23318 return countreg;
23319 if (CONST_INT_P (countreg))
23320 return GEN_INT (INTVAL (countreg) / scale);
23321 gcc_assert (REG_P (countreg));
23323 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23324 GEN_INT (exact_log2 (scale)),
23325 NULL, 1, OPTAB_DIRECT);
23326 return sc;
23329 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23330 DImode for constant loop counts. */
23332 static machine_mode
23333 counter_mode (rtx count_exp)
23335 if (GET_MODE (count_exp) != VOIDmode)
23336 return GET_MODE (count_exp);
23337 if (!CONST_INT_P (count_exp))
23338 return Pmode;
23339 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23340 return DImode;
23341 return SImode;
23344 /* Copy the address to a Pmode register. This is used for x32 to
23345 truncate DImode TLS address to a SImode register. */
23347 static rtx
23348 ix86_copy_addr_to_reg (rtx addr)
23350 rtx reg;
23351 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23353 reg = copy_addr_to_reg (addr);
23354 REG_POINTER (reg) = 1;
23355 return reg;
23357 else
23359 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23360 reg = copy_to_mode_reg (DImode, addr);
23361 REG_POINTER (reg) = 1;
23362 return gen_rtx_SUBREG (SImode, reg, 0);
23366 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23367 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23368 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23369 memory by VALUE (supposed to be in MODE).
23371 The size is rounded down to whole number of chunk size moved at once.
23372 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23375 static void
23376 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23377 rtx destptr, rtx srcptr, rtx value,
23378 rtx count, machine_mode mode, int unroll,
23379 int expected_size, bool issetmem)
23381 rtx_code_label *out_label, *top_label;
23382 rtx iter, tmp;
23383 machine_mode iter_mode = counter_mode (count);
23384 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23385 rtx piece_size = GEN_INT (piece_size_n);
23386 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23387 rtx size;
23388 int i;
23390 top_label = gen_label_rtx ();
23391 out_label = gen_label_rtx ();
23392 iter = gen_reg_rtx (iter_mode);
23394 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23395 NULL, 1, OPTAB_DIRECT);
23396 /* Those two should combine. */
23397 if (piece_size == const1_rtx)
23399 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23400 true, out_label);
23401 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23403 emit_move_insn (iter, const0_rtx);
23405 emit_label (top_label);
23407 tmp = convert_modes (Pmode, iter_mode, iter, true);
23409 /* This assert could be relaxed - in this case we'll need to compute
23410 smallest power of two, containing in PIECE_SIZE_N and pass it to
23411 offset_address. */
23412 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23413 destmem = offset_address (destmem, tmp, piece_size_n);
23414 destmem = adjust_address (destmem, mode, 0);
23416 if (!issetmem)
23418 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23419 srcmem = adjust_address (srcmem, mode, 0);
23421 /* When unrolling for chips that reorder memory reads and writes,
23422 we can save registers by using single temporary.
23423 Also using 4 temporaries is overkill in 32bit mode. */
23424 if (!TARGET_64BIT && 0)
23426 for (i = 0; i < unroll; i++)
23428 if (i)
23430 destmem =
23431 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23432 srcmem =
23433 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23435 emit_move_insn (destmem, srcmem);
23438 else
23440 rtx tmpreg[4];
23441 gcc_assert (unroll <= 4);
23442 for (i = 0; i < unroll; i++)
23444 tmpreg[i] = gen_reg_rtx (mode);
23445 if (i)
23447 srcmem =
23448 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23450 emit_move_insn (tmpreg[i], srcmem);
23452 for (i = 0; i < unroll; i++)
23454 if (i)
23456 destmem =
23457 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23459 emit_move_insn (destmem, tmpreg[i]);
23463 else
23464 for (i = 0; i < unroll; i++)
23466 if (i)
23467 destmem =
23468 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23469 emit_move_insn (destmem, value);
23472 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23473 true, OPTAB_LIB_WIDEN);
23474 if (tmp != iter)
23475 emit_move_insn (iter, tmp);
23477 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23478 true, top_label);
23479 if (expected_size != -1)
23481 expected_size /= GET_MODE_SIZE (mode) * unroll;
23482 if (expected_size == 0)
23483 predict_jump (0);
23484 else if (expected_size > REG_BR_PROB_BASE)
23485 predict_jump (REG_BR_PROB_BASE - 1);
23486 else
23487 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23489 else
23490 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23491 iter = ix86_zero_extend_to_Pmode (iter);
23492 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23493 true, OPTAB_LIB_WIDEN);
23494 if (tmp != destptr)
23495 emit_move_insn (destptr, tmp);
23496 if (!issetmem)
23498 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23499 true, OPTAB_LIB_WIDEN);
23500 if (tmp != srcptr)
23501 emit_move_insn (srcptr, tmp);
23503 emit_label (out_label);
23506 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23507 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23508 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23509 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23510 ORIG_VALUE is the original value passed to memset to fill the memory with.
23511 Other arguments have same meaning as for previous function. */
23513 static void
23514 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23515 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23516 rtx count,
23517 machine_mode mode, bool issetmem)
23519 rtx destexp;
23520 rtx srcexp;
23521 rtx countreg;
23522 HOST_WIDE_INT rounded_count;
23524 /* If possible, it is shorter to use rep movs.
23525 TODO: Maybe it is better to move this logic to decide_alg. */
23526 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23527 && (!issetmem || orig_value == const0_rtx))
23528 mode = SImode;
23530 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23531 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23533 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23534 GET_MODE_SIZE (mode)));
23535 if (mode != QImode)
23537 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23538 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23539 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23541 else
23542 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23543 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23545 rounded_count = (INTVAL (count)
23546 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23547 destmem = shallow_copy_rtx (destmem);
23548 set_mem_size (destmem, rounded_count);
23550 else if (MEM_SIZE_KNOWN_P (destmem))
23551 clear_mem_size (destmem);
23553 if (issetmem)
23555 value = force_reg (mode, gen_lowpart (mode, value));
23556 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23558 else
23560 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23561 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23562 if (mode != QImode)
23564 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23565 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23566 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23568 else
23569 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23570 if (CONST_INT_P (count))
23572 rounded_count = (INTVAL (count)
23573 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23574 srcmem = shallow_copy_rtx (srcmem);
23575 set_mem_size (srcmem, rounded_count);
23577 else
23579 if (MEM_SIZE_KNOWN_P (srcmem))
23580 clear_mem_size (srcmem);
23582 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23583 destexp, srcexp));
23587 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23588 DESTMEM.
23589 SRC is passed by pointer to be updated on return.
23590 Return value is updated DST. */
23591 static rtx
23592 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23593 HOST_WIDE_INT size_to_move)
23595 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23596 enum insn_code code;
23597 machine_mode move_mode;
23598 int piece_size, i;
23600 /* Find the widest mode in which we could perform moves.
23601 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23602 it until move of such size is supported. */
23603 piece_size = 1 << floor_log2 (size_to_move);
23604 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23605 code = optab_handler (mov_optab, move_mode);
23606 while (code == CODE_FOR_nothing && piece_size > 1)
23608 piece_size >>= 1;
23609 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23610 code = optab_handler (mov_optab, move_mode);
23613 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23614 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23615 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23617 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23618 move_mode = mode_for_vector (word_mode, nunits);
23619 code = optab_handler (mov_optab, move_mode);
23620 if (code == CODE_FOR_nothing)
23622 move_mode = word_mode;
23623 piece_size = GET_MODE_SIZE (move_mode);
23624 code = optab_handler (mov_optab, move_mode);
23627 gcc_assert (code != CODE_FOR_nothing);
23629 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23630 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23632 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23633 gcc_assert (size_to_move % piece_size == 0);
23634 adjust = GEN_INT (piece_size);
23635 for (i = 0; i < size_to_move; i += piece_size)
23637 /* We move from memory to memory, so we'll need to do it via
23638 a temporary register. */
23639 tempreg = gen_reg_rtx (move_mode);
23640 emit_insn (GEN_FCN (code) (tempreg, src));
23641 emit_insn (GEN_FCN (code) (dst, tempreg));
23643 emit_move_insn (destptr,
23644 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23645 emit_move_insn (srcptr,
23646 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23648 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23649 piece_size);
23650 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23651 piece_size);
23654 /* Update DST and SRC rtx. */
23655 *srcmem = src;
23656 return dst;
23659 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23660 static void
23661 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23662 rtx destptr, rtx srcptr, rtx count, int max_size)
23664 rtx src, dest;
23665 if (CONST_INT_P (count))
23667 HOST_WIDE_INT countval = INTVAL (count);
23668 HOST_WIDE_INT epilogue_size = countval % max_size;
23669 int i;
23671 /* For now MAX_SIZE should be a power of 2. This assert could be
23672 relaxed, but it'll require a bit more complicated epilogue
23673 expanding. */
23674 gcc_assert ((max_size & (max_size - 1)) == 0);
23675 for (i = max_size; i >= 1; i >>= 1)
23677 if (epilogue_size & i)
23678 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23680 return;
23682 if (max_size > 8)
23684 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23685 count, 1, OPTAB_DIRECT);
23686 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23687 count, QImode, 1, 4, false);
23688 return;
23691 /* When there are stringops, we can cheaply increase dest and src pointers.
23692 Otherwise we save code size by maintaining offset (zero is readily
23693 available from preceding rep operation) and using x86 addressing modes.
23695 if (TARGET_SINGLE_STRINGOP)
23697 if (max_size > 4)
23699 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23700 src = change_address (srcmem, SImode, srcptr);
23701 dest = change_address (destmem, SImode, destptr);
23702 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23703 emit_label (label);
23704 LABEL_NUSES (label) = 1;
23706 if (max_size > 2)
23708 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23709 src = change_address (srcmem, HImode, srcptr);
23710 dest = change_address (destmem, HImode, destptr);
23711 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23712 emit_label (label);
23713 LABEL_NUSES (label) = 1;
23715 if (max_size > 1)
23717 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23718 src = change_address (srcmem, QImode, srcptr);
23719 dest = change_address (destmem, QImode, destptr);
23720 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23721 emit_label (label);
23722 LABEL_NUSES (label) = 1;
23725 else
23727 rtx offset = force_reg (Pmode, const0_rtx);
23728 rtx tmp;
23730 if (max_size > 4)
23732 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23733 src = change_address (srcmem, SImode, srcptr);
23734 dest = change_address (destmem, SImode, destptr);
23735 emit_move_insn (dest, src);
23736 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23737 true, OPTAB_LIB_WIDEN);
23738 if (tmp != offset)
23739 emit_move_insn (offset, tmp);
23740 emit_label (label);
23741 LABEL_NUSES (label) = 1;
23743 if (max_size > 2)
23745 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23746 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23747 src = change_address (srcmem, HImode, tmp);
23748 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23749 dest = change_address (destmem, HImode, tmp);
23750 emit_move_insn (dest, src);
23751 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23752 true, OPTAB_LIB_WIDEN);
23753 if (tmp != offset)
23754 emit_move_insn (offset, tmp);
23755 emit_label (label);
23756 LABEL_NUSES (label) = 1;
23758 if (max_size > 1)
23760 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23761 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23762 src = change_address (srcmem, QImode, tmp);
23763 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23764 dest = change_address (destmem, QImode, tmp);
23765 emit_move_insn (dest, src);
23766 emit_label (label);
23767 LABEL_NUSES (label) = 1;
23772 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23773 with value PROMOTED_VAL.
23774 SRC is passed by pointer to be updated on return.
23775 Return value is updated DST. */
23776 static rtx
23777 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23778 HOST_WIDE_INT size_to_move)
23780 rtx dst = destmem, adjust;
23781 enum insn_code code;
23782 machine_mode move_mode;
23783 int piece_size, i;
23785 /* Find the widest mode in which we could perform moves.
23786 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23787 it until move of such size is supported. */
23788 move_mode = GET_MODE (promoted_val);
23789 if (move_mode == VOIDmode)
23790 move_mode = QImode;
23791 if (size_to_move < GET_MODE_SIZE (move_mode))
23793 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23794 promoted_val = gen_lowpart (move_mode, promoted_val);
23796 piece_size = GET_MODE_SIZE (move_mode);
23797 code = optab_handler (mov_optab, move_mode);
23798 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23800 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23802 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23803 gcc_assert (size_to_move % piece_size == 0);
23804 adjust = GEN_INT (piece_size);
23805 for (i = 0; i < size_to_move; i += piece_size)
23807 if (piece_size <= GET_MODE_SIZE (word_mode))
23809 emit_insn (gen_strset (destptr, dst, promoted_val));
23810 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23811 piece_size);
23812 continue;
23815 emit_insn (GEN_FCN (code) (dst, promoted_val));
23817 emit_move_insn (destptr,
23818 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23820 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23821 piece_size);
23824 /* Update DST rtx. */
23825 return dst;
23827 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23828 static void
23829 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23830 rtx count, int max_size)
23832 count =
23833 expand_simple_binop (counter_mode (count), AND, count,
23834 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23835 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23836 gen_lowpart (QImode, value), count, QImode,
23837 1, max_size / 2, true);
23840 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23841 static void
23842 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23843 rtx count, int max_size)
23845 rtx dest;
23847 if (CONST_INT_P (count))
23849 HOST_WIDE_INT countval = INTVAL (count);
23850 HOST_WIDE_INT epilogue_size = countval % max_size;
23851 int i;
23853 /* For now MAX_SIZE should be a power of 2. This assert could be
23854 relaxed, but it'll require a bit more complicated epilogue
23855 expanding. */
23856 gcc_assert ((max_size & (max_size - 1)) == 0);
23857 for (i = max_size; i >= 1; i >>= 1)
23859 if (epilogue_size & i)
23861 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23862 destmem = emit_memset (destmem, destptr, vec_value, i);
23863 else
23864 destmem = emit_memset (destmem, destptr, value, i);
23867 return;
23869 if (max_size > 32)
23871 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23872 return;
23874 if (max_size > 16)
23876 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23877 if (TARGET_64BIT)
23879 dest = change_address (destmem, DImode, destptr);
23880 emit_insn (gen_strset (destptr, dest, value));
23881 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23882 emit_insn (gen_strset (destptr, dest, value));
23884 else
23886 dest = change_address (destmem, SImode, destptr);
23887 emit_insn (gen_strset (destptr, dest, value));
23888 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23889 emit_insn (gen_strset (destptr, dest, value));
23890 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23891 emit_insn (gen_strset (destptr, dest, value));
23892 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23893 emit_insn (gen_strset (destptr, dest, value));
23895 emit_label (label);
23896 LABEL_NUSES (label) = 1;
23898 if (max_size > 8)
23900 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23901 if (TARGET_64BIT)
23903 dest = change_address (destmem, DImode, destptr);
23904 emit_insn (gen_strset (destptr, dest, value));
23906 else
23908 dest = change_address (destmem, SImode, destptr);
23909 emit_insn (gen_strset (destptr, dest, value));
23910 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23911 emit_insn (gen_strset (destptr, dest, value));
23913 emit_label (label);
23914 LABEL_NUSES (label) = 1;
23916 if (max_size > 4)
23918 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23919 dest = change_address (destmem, SImode, destptr);
23920 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23921 emit_label (label);
23922 LABEL_NUSES (label) = 1;
23924 if (max_size > 2)
23926 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23927 dest = change_address (destmem, HImode, destptr);
23928 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23929 emit_label (label);
23930 LABEL_NUSES (label) = 1;
23932 if (max_size > 1)
23934 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23935 dest = change_address (destmem, QImode, destptr);
23936 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23937 emit_label (label);
23938 LABEL_NUSES (label) = 1;
23942 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23943 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23944 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23945 ignored.
23946 Return value is updated DESTMEM. */
23947 static rtx
23948 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23949 rtx destptr, rtx srcptr, rtx value,
23950 rtx vec_value, rtx count, int align,
23951 int desired_alignment, bool issetmem)
23953 int i;
23954 for (i = 1; i < desired_alignment; i <<= 1)
23956 if (align <= i)
23958 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23959 if (issetmem)
23961 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23962 destmem = emit_memset (destmem, destptr, vec_value, i);
23963 else
23964 destmem = emit_memset (destmem, destptr, value, i);
23966 else
23967 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23968 ix86_adjust_counter (count, i);
23969 emit_label (label);
23970 LABEL_NUSES (label) = 1;
23971 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23974 return destmem;
23977 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23978 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23979 and jump to DONE_LABEL. */
23980 static void
23981 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23982 rtx destptr, rtx srcptr,
23983 rtx value, rtx vec_value,
23984 rtx count, int size,
23985 rtx done_label, bool issetmem)
23987 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23988 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23989 rtx modesize;
23990 int n;
23992 /* If we do not have vector value to copy, we must reduce size. */
23993 if (issetmem)
23995 if (!vec_value)
23997 if (GET_MODE (value) == VOIDmode && size > 8)
23998 mode = Pmode;
23999 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24000 mode = GET_MODE (value);
24002 else
24003 mode = GET_MODE (vec_value), value = vec_value;
24005 else
24007 /* Choose appropriate vector mode. */
24008 if (size >= 32)
24009 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24010 else if (size >= 16)
24011 mode = TARGET_SSE ? V16QImode : DImode;
24012 srcmem = change_address (srcmem, mode, srcptr);
24014 destmem = change_address (destmem, mode, destptr);
24015 modesize = GEN_INT (GET_MODE_SIZE (mode));
24016 gcc_assert (GET_MODE_SIZE (mode) <= size);
24017 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24019 if (issetmem)
24020 emit_move_insn (destmem, gen_lowpart (mode, value));
24021 else
24023 emit_move_insn (destmem, srcmem);
24024 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24026 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24029 destmem = offset_address (destmem, count, 1);
24030 destmem = offset_address (destmem, GEN_INT (-2 * size),
24031 GET_MODE_SIZE (mode));
24032 if (!issetmem)
24034 srcmem = offset_address (srcmem, count, 1);
24035 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24036 GET_MODE_SIZE (mode));
24038 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24040 if (issetmem)
24041 emit_move_insn (destmem, gen_lowpart (mode, value));
24042 else
24044 emit_move_insn (destmem, srcmem);
24045 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24047 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24049 emit_jump_insn (gen_jump (done_label));
24050 emit_barrier ();
24052 emit_label (label);
24053 LABEL_NUSES (label) = 1;
24056 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24057 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24058 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24059 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24060 DONE_LABEL is a label after the whole copying sequence. The label is created
24061 on demand if *DONE_LABEL is NULL.
24062 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24063 bounds after the initial copies.
24065 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24066 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24067 we will dispatch to a library call for large blocks.
24069 In pseudocode we do:
24071 if (COUNT < SIZE)
24073 Assume that SIZE is 4. Bigger sizes are handled analogously
24074 if (COUNT & 4)
24076 copy 4 bytes from SRCPTR to DESTPTR
24077 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24078 goto done_label
24080 if (!COUNT)
24081 goto done_label;
24082 copy 1 byte from SRCPTR to DESTPTR
24083 if (COUNT & 2)
24085 copy 2 bytes from SRCPTR to DESTPTR
24086 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24089 else
24091 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24092 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24094 OLD_DESPTR = DESTPTR;
24095 Align DESTPTR up to DESIRED_ALIGN
24096 SRCPTR += DESTPTR - OLD_DESTPTR
24097 COUNT -= DEST_PTR - OLD_DESTPTR
24098 if (DYNAMIC_CHECK)
24099 Round COUNT down to multiple of SIZE
24100 << optional caller supplied zero size guard is here >>
24101 << optional caller suppplied dynamic check is here >>
24102 << caller supplied main copy loop is here >>
24104 done_label:
24106 static void
24107 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24108 rtx *destptr, rtx *srcptr,
24109 machine_mode mode,
24110 rtx value, rtx vec_value,
24111 rtx *count,
24112 rtx_code_label **done_label,
24113 int size,
24114 int desired_align,
24115 int align,
24116 unsigned HOST_WIDE_INT *min_size,
24117 bool dynamic_check,
24118 bool issetmem)
24120 rtx_code_label *loop_label = NULL, *label;
24121 int n;
24122 rtx modesize;
24123 int prolog_size = 0;
24124 rtx mode_value;
24126 /* Chose proper value to copy. */
24127 if (issetmem && VECTOR_MODE_P (mode))
24128 mode_value = vec_value;
24129 else
24130 mode_value = value;
24131 gcc_assert (GET_MODE_SIZE (mode) <= size);
24133 /* See if block is big or small, handle small blocks. */
24134 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24136 int size2 = size;
24137 loop_label = gen_label_rtx ();
24139 if (!*done_label)
24140 *done_label = gen_label_rtx ();
24142 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24143 1, loop_label);
24144 size2 >>= 1;
24146 /* Handle sizes > 3. */
24147 for (;size2 > 2; size2 >>= 1)
24148 expand_small_movmem_or_setmem (destmem, srcmem,
24149 *destptr, *srcptr,
24150 value, vec_value,
24151 *count,
24152 size2, *done_label, issetmem);
24153 /* Nothing to copy? Jump to DONE_LABEL if so */
24154 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24155 1, *done_label);
24157 /* Do a byte copy. */
24158 destmem = change_address (destmem, QImode, *destptr);
24159 if (issetmem)
24160 emit_move_insn (destmem, gen_lowpart (QImode, value));
24161 else
24163 srcmem = change_address (srcmem, QImode, *srcptr);
24164 emit_move_insn (destmem, srcmem);
24167 /* Handle sizes 2 and 3. */
24168 label = ix86_expand_aligntest (*count, 2, false);
24169 destmem = change_address (destmem, HImode, *destptr);
24170 destmem = offset_address (destmem, *count, 1);
24171 destmem = offset_address (destmem, GEN_INT (-2), 2);
24172 if (issetmem)
24173 emit_move_insn (destmem, gen_lowpart (HImode, value));
24174 else
24176 srcmem = change_address (srcmem, HImode, *srcptr);
24177 srcmem = offset_address (srcmem, *count, 1);
24178 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24179 emit_move_insn (destmem, srcmem);
24182 emit_label (label);
24183 LABEL_NUSES (label) = 1;
24184 emit_jump_insn (gen_jump (*done_label));
24185 emit_barrier ();
24187 else
24188 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24189 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24191 /* Start memcpy for COUNT >= SIZE. */
24192 if (loop_label)
24194 emit_label (loop_label);
24195 LABEL_NUSES (loop_label) = 1;
24198 /* Copy first desired_align bytes. */
24199 if (!issetmem)
24200 srcmem = change_address (srcmem, mode, *srcptr);
24201 destmem = change_address (destmem, mode, *destptr);
24202 modesize = GEN_INT (GET_MODE_SIZE (mode));
24203 for (n = 0; prolog_size < desired_align - align; n++)
24205 if (issetmem)
24206 emit_move_insn (destmem, mode_value);
24207 else
24209 emit_move_insn (destmem, srcmem);
24210 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24212 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24213 prolog_size += GET_MODE_SIZE (mode);
24217 /* Copy last SIZE bytes. */
24218 destmem = offset_address (destmem, *count, 1);
24219 destmem = offset_address (destmem,
24220 GEN_INT (-size - prolog_size),
24222 if (issetmem)
24223 emit_move_insn (destmem, mode_value);
24224 else
24226 srcmem = offset_address (srcmem, *count, 1);
24227 srcmem = offset_address (srcmem,
24228 GEN_INT (-size - prolog_size),
24230 emit_move_insn (destmem, srcmem);
24232 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24234 destmem = offset_address (destmem, modesize, 1);
24235 if (issetmem)
24236 emit_move_insn (destmem, mode_value);
24237 else
24239 srcmem = offset_address (srcmem, modesize, 1);
24240 emit_move_insn (destmem, srcmem);
24244 /* Align destination. */
24245 if (desired_align > 1 && desired_align > align)
24247 rtx saveddest = *destptr;
24249 gcc_assert (desired_align <= size);
24250 /* Align destptr up, place it to new register. */
24251 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24252 GEN_INT (prolog_size),
24253 NULL_RTX, 1, OPTAB_DIRECT);
24254 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24255 REG_POINTER (*destptr) = 1;
24256 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24257 GEN_INT (-desired_align),
24258 *destptr, 1, OPTAB_DIRECT);
24259 /* See how many bytes we skipped. */
24260 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24261 *destptr,
24262 saveddest, 1, OPTAB_DIRECT);
24263 /* Adjust srcptr and count. */
24264 if (!issetmem)
24265 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24266 saveddest, *srcptr, 1, OPTAB_DIRECT);
24267 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24268 saveddest, *count, 1, OPTAB_DIRECT);
24269 /* We copied at most size + prolog_size. */
24270 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24271 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24272 else
24273 *min_size = 0;
24275 /* Our loops always round down the bock size, but for dispatch to library
24276 we need precise value. */
24277 if (dynamic_check)
24278 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24279 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24281 else
24283 gcc_assert (prolog_size == 0);
24284 /* Decrease count, so we won't end up copying last word twice. */
24285 if (!CONST_INT_P (*count))
24286 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24287 constm1_rtx, *count, 1, OPTAB_DIRECT);
24288 else
24289 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24290 if (*min_size)
24291 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24296 /* This function is like the previous one, except here we know how many bytes
24297 need to be copied. That allows us to update alignment not only of DST, which
24298 is returned, but also of SRC, which is passed as a pointer for that
24299 reason. */
24300 static rtx
24301 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24302 rtx srcreg, rtx value, rtx vec_value,
24303 int desired_align, int align_bytes,
24304 bool issetmem)
24306 rtx src = NULL;
24307 rtx orig_dst = dst;
24308 rtx orig_src = NULL;
24309 int piece_size = 1;
24310 int copied_bytes = 0;
24312 if (!issetmem)
24314 gcc_assert (srcp != NULL);
24315 src = *srcp;
24316 orig_src = src;
24319 for (piece_size = 1;
24320 piece_size <= desired_align && copied_bytes < align_bytes;
24321 piece_size <<= 1)
24323 if (align_bytes & piece_size)
24325 if (issetmem)
24327 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24328 dst = emit_memset (dst, destreg, vec_value, piece_size);
24329 else
24330 dst = emit_memset (dst, destreg, value, piece_size);
24332 else
24333 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24334 copied_bytes += piece_size;
24337 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24338 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24339 if (MEM_SIZE_KNOWN_P (orig_dst))
24340 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24342 if (!issetmem)
24344 int src_align_bytes = get_mem_align_offset (src, desired_align
24345 * BITS_PER_UNIT);
24346 if (src_align_bytes >= 0)
24347 src_align_bytes = desired_align - src_align_bytes;
24348 if (src_align_bytes >= 0)
24350 unsigned int src_align;
24351 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24353 if ((src_align_bytes & (src_align - 1))
24354 == (align_bytes & (src_align - 1)))
24355 break;
24357 if (src_align > (unsigned int) desired_align)
24358 src_align = desired_align;
24359 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24360 set_mem_align (src, src_align * BITS_PER_UNIT);
24362 if (MEM_SIZE_KNOWN_P (orig_src))
24363 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24364 *srcp = src;
24367 return dst;
24370 /* Return true if ALG can be used in current context.
24371 Assume we expand memset if MEMSET is true. */
24372 static bool
24373 alg_usable_p (enum stringop_alg alg, bool memset)
24375 if (alg == no_stringop)
24376 return false;
24377 if (alg == vector_loop)
24378 return TARGET_SSE || TARGET_AVX;
24379 /* Algorithms using the rep prefix want at least edi and ecx;
24380 additionally, memset wants eax and memcpy wants esi. Don't
24381 consider such algorithms if the user has appropriated those
24382 registers for their own purposes. */
24383 if (alg == rep_prefix_1_byte
24384 || alg == rep_prefix_4_byte
24385 || alg == rep_prefix_8_byte)
24386 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24387 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24388 return true;
24391 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24392 static enum stringop_alg
24393 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24394 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24395 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24397 const struct stringop_algs * algs;
24398 bool optimize_for_speed;
24399 int max = 0;
24400 const struct processor_costs *cost;
24401 int i;
24402 bool any_alg_usable_p = false;
24404 *noalign = false;
24405 *dynamic_check = -1;
24407 /* Even if the string operation call is cold, we still might spend a lot
24408 of time processing large blocks. */
24409 if (optimize_function_for_size_p (cfun)
24410 || (optimize_insn_for_size_p ()
24411 && (max_size < 256
24412 || (expected_size != -1 && expected_size < 256))))
24413 optimize_for_speed = false;
24414 else
24415 optimize_for_speed = true;
24417 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24418 if (memset)
24419 algs = &cost->memset[TARGET_64BIT != 0];
24420 else
24421 algs = &cost->memcpy[TARGET_64BIT != 0];
24423 /* See maximal size for user defined algorithm. */
24424 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24426 enum stringop_alg candidate = algs->size[i].alg;
24427 bool usable = alg_usable_p (candidate, memset);
24428 any_alg_usable_p |= usable;
24430 if (candidate != libcall && candidate && usable)
24431 max = algs->size[i].max;
24434 /* If expected size is not known but max size is small enough
24435 so inline version is a win, set expected size into
24436 the range. */
24437 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24438 && expected_size == -1)
24439 expected_size = min_size / 2 + max_size / 2;
24441 /* If user specified the algorithm, honnor it if possible. */
24442 if (ix86_stringop_alg != no_stringop
24443 && alg_usable_p (ix86_stringop_alg, memset))
24444 return ix86_stringop_alg;
24445 /* rep; movq or rep; movl is the smallest variant. */
24446 else if (!optimize_for_speed)
24448 *noalign = true;
24449 if (!count || (count & 3) || (memset && !zero_memset))
24450 return alg_usable_p (rep_prefix_1_byte, memset)
24451 ? rep_prefix_1_byte : loop_1_byte;
24452 else
24453 return alg_usable_p (rep_prefix_4_byte, memset)
24454 ? rep_prefix_4_byte : loop;
24456 /* Very tiny blocks are best handled via the loop, REP is expensive to
24457 setup. */
24458 else if (expected_size != -1 && expected_size < 4)
24459 return loop_1_byte;
24460 else if (expected_size != -1)
24462 enum stringop_alg alg = libcall;
24463 bool alg_noalign = false;
24464 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24466 /* We get here if the algorithms that were not libcall-based
24467 were rep-prefix based and we are unable to use rep prefixes
24468 based on global register usage. Break out of the loop and
24469 use the heuristic below. */
24470 if (algs->size[i].max == 0)
24471 break;
24472 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24474 enum stringop_alg candidate = algs->size[i].alg;
24476 if (candidate != libcall && alg_usable_p (candidate, memset))
24478 alg = candidate;
24479 alg_noalign = algs->size[i].noalign;
24481 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24482 last non-libcall inline algorithm. */
24483 if (TARGET_INLINE_ALL_STRINGOPS)
24485 /* When the current size is best to be copied by a libcall,
24486 but we are still forced to inline, run the heuristic below
24487 that will pick code for medium sized blocks. */
24488 if (alg != libcall)
24490 *noalign = alg_noalign;
24491 return alg;
24493 else if (!any_alg_usable_p)
24494 break;
24496 else if (alg_usable_p (candidate, memset))
24498 *noalign = algs->size[i].noalign;
24499 return candidate;
24504 /* When asked to inline the call anyway, try to pick meaningful choice.
24505 We look for maximal size of block that is faster to copy by hand and
24506 take blocks of at most of that size guessing that average size will
24507 be roughly half of the block.
24509 If this turns out to be bad, we might simply specify the preferred
24510 choice in ix86_costs. */
24511 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24512 && (algs->unknown_size == libcall
24513 || !alg_usable_p (algs->unknown_size, memset)))
24515 enum stringop_alg alg;
24517 /* If there aren't any usable algorithms, then recursing on
24518 smaller sizes isn't going to find anything. Just return the
24519 simple byte-at-a-time copy loop. */
24520 if (!any_alg_usable_p)
24522 /* Pick something reasonable. */
24523 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24524 *dynamic_check = 128;
24525 return loop_1_byte;
24527 if (max <= 0)
24528 max = 4096;
24529 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24530 zero_memset, dynamic_check, noalign);
24531 gcc_assert (*dynamic_check == -1);
24532 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24533 *dynamic_check = max;
24534 else
24535 gcc_assert (alg != libcall);
24536 return alg;
24538 return (alg_usable_p (algs->unknown_size, memset)
24539 ? algs->unknown_size : libcall);
24542 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24543 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24544 static int
24545 decide_alignment (int align,
24546 enum stringop_alg alg,
24547 int expected_size,
24548 machine_mode move_mode)
24550 int desired_align = 0;
24552 gcc_assert (alg != no_stringop);
24554 if (alg == libcall)
24555 return 0;
24556 if (move_mode == VOIDmode)
24557 return 0;
24559 desired_align = GET_MODE_SIZE (move_mode);
24560 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24561 copying whole cacheline at once. */
24562 if (TARGET_PENTIUMPRO
24563 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24564 desired_align = 8;
24566 if (optimize_size)
24567 desired_align = 1;
24568 if (desired_align < align)
24569 desired_align = align;
24570 if (expected_size != -1 && expected_size < 4)
24571 desired_align = align;
24573 return desired_align;
24577 /* Helper function for memcpy. For QImode value 0xXY produce
24578 0xXYXYXYXY of wide specified by MODE. This is essentially
24579 a * 0x10101010, but we can do slightly better than
24580 synth_mult by unwinding the sequence by hand on CPUs with
24581 slow multiply. */
24582 static rtx
24583 promote_duplicated_reg (machine_mode mode, rtx val)
24585 machine_mode valmode = GET_MODE (val);
24586 rtx tmp;
24587 int nops = mode == DImode ? 3 : 2;
24589 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24590 if (val == const0_rtx)
24591 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24592 if (CONST_INT_P (val))
24594 HOST_WIDE_INT v = INTVAL (val) & 255;
24596 v |= v << 8;
24597 v |= v << 16;
24598 if (mode == DImode)
24599 v |= (v << 16) << 16;
24600 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24603 if (valmode == VOIDmode)
24604 valmode = QImode;
24605 if (valmode != QImode)
24606 val = gen_lowpart (QImode, val);
24607 if (mode == QImode)
24608 return val;
24609 if (!TARGET_PARTIAL_REG_STALL)
24610 nops--;
24611 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24612 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24613 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24614 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24616 rtx reg = convert_modes (mode, QImode, val, true);
24617 tmp = promote_duplicated_reg (mode, const1_rtx);
24618 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24619 OPTAB_DIRECT);
24621 else
24623 rtx reg = convert_modes (mode, QImode, val, true);
24625 if (!TARGET_PARTIAL_REG_STALL)
24626 if (mode == SImode)
24627 emit_insn (gen_movsi_insv_1 (reg, reg));
24628 else
24629 emit_insn (gen_movdi_insv_1 (reg, reg));
24630 else
24632 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24633 NULL, 1, OPTAB_DIRECT);
24634 reg =
24635 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24637 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24638 NULL, 1, OPTAB_DIRECT);
24639 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24640 if (mode == SImode)
24641 return reg;
24642 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24643 NULL, 1, OPTAB_DIRECT);
24644 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24645 return reg;
24649 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24650 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24651 alignment from ALIGN to DESIRED_ALIGN. */
24652 static rtx
24653 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24654 int align)
24656 rtx promoted_val;
24658 if (TARGET_64BIT
24659 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24660 promoted_val = promote_duplicated_reg (DImode, val);
24661 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24662 promoted_val = promote_duplicated_reg (SImode, val);
24663 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24664 promoted_val = promote_duplicated_reg (HImode, val);
24665 else
24666 promoted_val = val;
24668 return promoted_val;
24671 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24672 operations when profitable. The code depends upon architecture, block size
24673 and alignment, but always has one of the following overall structures:
24675 Aligned move sequence:
24677 1) Prologue guard: Conditional that jumps up to epilogues for small
24678 blocks that can be handled by epilogue alone. This is faster
24679 but also needed for correctness, since prologue assume the block
24680 is larger than the desired alignment.
24682 Optional dynamic check for size and libcall for large
24683 blocks is emitted here too, with -minline-stringops-dynamically.
24685 2) Prologue: copy first few bytes in order to get destination
24686 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24687 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24688 copied. We emit either a jump tree on power of two sized
24689 blocks, or a byte loop.
24691 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24692 with specified algorithm.
24694 4) Epilogue: code copying tail of the block that is too small to be
24695 handled by main body (or up to size guarded by prologue guard).
24697 Misaligned move sequence
24699 1) missaligned move prologue/epilogue containing:
24700 a) Prologue handling small memory blocks and jumping to done_label
24701 (skipped if blocks are known to be large enough)
24702 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24703 needed by single possibly misaligned move
24704 (skipped if alignment is not needed)
24705 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24707 2) Zero size guard dispatching to done_label, if needed
24709 3) dispatch to library call, if needed,
24711 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24712 with specified algorithm. */
24713 bool
24714 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24715 rtx align_exp, rtx expected_align_exp,
24716 rtx expected_size_exp, rtx min_size_exp,
24717 rtx max_size_exp, rtx probable_max_size_exp,
24718 bool issetmem)
24720 rtx destreg;
24721 rtx srcreg = NULL;
24722 rtx_code_label *label = NULL;
24723 rtx tmp;
24724 rtx_code_label *jump_around_label = NULL;
24725 HOST_WIDE_INT align = 1;
24726 unsigned HOST_WIDE_INT count = 0;
24727 HOST_WIDE_INT expected_size = -1;
24728 int size_needed = 0, epilogue_size_needed;
24729 int desired_align = 0, align_bytes = 0;
24730 enum stringop_alg alg;
24731 rtx promoted_val = NULL;
24732 rtx vec_promoted_val = NULL;
24733 bool force_loopy_epilogue = false;
24734 int dynamic_check;
24735 bool need_zero_guard = false;
24736 bool noalign;
24737 machine_mode move_mode = VOIDmode;
24738 int unroll_factor = 1;
24739 /* TODO: Once value ranges are available, fill in proper data. */
24740 unsigned HOST_WIDE_INT min_size = 0;
24741 unsigned HOST_WIDE_INT max_size = -1;
24742 unsigned HOST_WIDE_INT probable_max_size = -1;
24743 bool misaligned_prologue_used = false;
24745 if (CONST_INT_P (align_exp))
24746 align = INTVAL (align_exp);
24747 /* i386 can do misaligned access on reasonably increased cost. */
24748 if (CONST_INT_P (expected_align_exp)
24749 && INTVAL (expected_align_exp) > align)
24750 align = INTVAL (expected_align_exp);
24751 /* ALIGN is the minimum of destination and source alignment, but we care here
24752 just about destination alignment. */
24753 else if (!issetmem
24754 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24755 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24757 if (CONST_INT_P (count_exp))
24759 min_size = max_size = probable_max_size = count = expected_size
24760 = INTVAL (count_exp);
24761 /* When COUNT is 0, there is nothing to do. */
24762 if (!count)
24763 return true;
24765 else
24767 if (min_size_exp)
24768 min_size = INTVAL (min_size_exp);
24769 if (max_size_exp)
24770 max_size = INTVAL (max_size_exp);
24771 if (probable_max_size_exp)
24772 probable_max_size = INTVAL (probable_max_size_exp);
24773 if (CONST_INT_P (expected_size_exp))
24774 expected_size = INTVAL (expected_size_exp);
24777 /* Make sure we don't need to care about overflow later on. */
24778 if (count > (HOST_WIDE_INT_1U << 30))
24779 return false;
24781 /* Step 0: Decide on preferred algorithm, desired alignment and
24782 size of chunks to be copied by main loop. */
24783 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24784 issetmem,
24785 issetmem && val_exp == const0_rtx,
24786 &dynamic_check, &noalign);
24787 if (alg == libcall)
24788 return false;
24789 gcc_assert (alg != no_stringop);
24791 /* For now vector-version of memset is generated only for memory zeroing, as
24792 creating of promoted vector value is very cheap in this case. */
24793 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24794 alg = unrolled_loop;
24796 if (!count)
24797 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24798 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24799 if (!issetmem)
24800 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24802 unroll_factor = 1;
24803 move_mode = word_mode;
24804 switch (alg)
24806 case libcall:
24807 case no_stringop:
24808 case last_alg:
24809 gcc_unreachable ();
24810 case loop_1_byte:
24811 need_zero_guard = true;
24812 move_mode = QImode;
24813 break;
24814 case loop:
24815 need_zero_guard = true;
24816 break;
24817 case unrolled_loop:
24818 need_zero_guard = true;
24819 unroll_factor = (TARGET_64BIT ? 4 : 2);
24820 break;
24821 case vector_loop:
24822 need_zero_guard = true;
24823 unroll_factor = 4;
24824 /* Find the widest supported mode. */
24825 move_mode = word_mode;
24826 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24827 != CODE_FOR_nothing)
24828 move_mode = GET_MODE_WIDER_MODE (move_mode);
24830 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24831 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24832 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24834 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24835 move_mode = mode_for_vector (word_mode, nunits);
24836 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24837 move_mode = word_mode;
24839 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24840 break;
24841 case rep_prefix_8_byte:
24842 move_mode = DImode;
24843 break;
24844 case rep_prefix_4_byte:
24845 move_mode = SImode;
24846 break;
24847 case rep_prefix_1_byte:
24848 move_mode = QImode;
24849 break;
24851 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24852 epilogue_size_needed = size_needed;
24854 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24855 if (!TARGET_ALIGN_STRINGOPS || noalign)
24856 align = desired_align;
24858 /* Step 1: Prologue guard. */
24860 /* Alignment code needs count to be in register. */
24861 if (CONST_INT_P (count_exp) && desired_align > align)
24863 if (INTVAL (count_exp) > desired_align
24864 && INTVAL (count_exp) > size_needed)
24866 align_bytes
24867 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24868 if (align_bytes <= 0)
24869 align_bytes = 0;
24870 else
24871 align_bytes = desired_align - align_bytes;
24873 if (align_bytes == 0)
24874 count_exp = force_reg (counter_mode (count_exp), count_exp);
24876 gcc_assert (desired_align >= 1 && align >= 1);
24878 /* Misaligned move sequences handle both prologue and epilogue at once.
24879 Default code generation results in a smaller code for large alignments
24880 and also avoids redundant job when sizes are known precisely. */
24881 misaligned_prologue_used
24882 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24883 && MAX (desired_align, epilogue_size_needed) <= 32
24884 && desired_align <= epilogue_size_needed
24885 && ((desired_align > align && !align_bytes)
24886 || (!count && epilogue_size_needed > 1)));
24888 /* Do the cheap promotion to allow better CSE across the
24889 main loop and epilogue (ie one load of the big constant in the
24890 front of all code.
24891 For now the misaligned move sequences do not have fast path
24892 without broadcasting. */
24893 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24895 if (alg == vector_loop)
24897 gcc_assert (val_exp == const0_rtx);
24898 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24899 promoted_val = promote_duplicated_reg_to_size (val_exp,
24900 GET_MODE_SIZE (word_mode),
24901 desired_align, align);
24903 else
24905 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24906 desired_align, align);
24909 /* Misaligned move sequences handles both prologues and epilogues at once.
24910 Default code generation results in smaller code for large alignments and
24911 also avoids redundant job when sizes are known precisely. */
24912 if (misaligned_prologue_used)
24914 /* Misaligned move prologue handled small blocks by itself. */
24915 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24916 (dst, src, &destreg, &srcreg,
24917 move_mode, promoted_val, vec_promoted_val,
24918 &count_exp,
24919 &jump_around_label,
24920 desired_align < align
24921 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24922 desired_align, align, &min_size, dynamic_check, issetmem);
24923 if (!issetmem)
24924 src = change_address (src, BLKmode, srcreg);
24925 dst = change_address (dst, BLKmode, destreg);
24926 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24927 epilogue_size_needed = 0;
24928 if (need_zero_guard && !min_size)
24930 /* It is possible that we copied enough so the main loop will not
24931 execute. */
24932 gcc_assert (size_needed > 1);
24933 if (jump_around_label == NULL_RTX)
24934 jump_around_label = gen_label_rtx ();
24935 emit_cmp_and_jump_insns (count_exp,
24936 GEN_INT (size_needed),
24937 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24938 if (expected_size == -1
24939 || expected_size < (desired_align - align) / 2 + size_needed)
24940 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24941 else
24942 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24945 /* Ensure that alignment prologue won't copy past end of block. */
24946 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24948 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24949 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24950 Make sure it is power of 2. */
24951 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24953 /* To improve performance of small blocks, we jump around the VAL
24954 promoting mode. This mean that if the promoted VAL is not constant,
24955 we might not use it in the epilogue and have to use byte
24956 loop variant. */
24957 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24958 force_loopy_epilogue = true;
24959 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24960 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24962 /* If main algorithm works on QImode, no epilogue is needed.
24963 For small sizes just don't align anything. */
24964 if (size_needed == 1)
24965 desired_align = align;
24966 else
24967 goto epilogue;
24969 else if (!count
24970 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24972 label = gen_label_rtx ();
24973 emit_cmp_and_jump_insns (count_exp,
24974 GEN_INT (epilogue_size_needed),
24975 LTU, 0, counter_mode (count_exp), 1, label);
24976 if (expected_size == -1 || expected_size < epilogue_size_needed)
24977 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24978 else
24979 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24983 /* Emit code to decide on runtime whether library call or inline should be
24984 used. */
24985 if (dynamic_check != -1)
24987 if (!issetmem && CONST_INT_P (count_exp))
24989 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24991 emit_block_move_via_libcall (dst, src, count_exp, false);
24992 count_exp = const0_rtx;
24993 goto epilogue;
24996 else
24998 rtx_code_label *hot_label = gen_label_rtx ();
24999 if (jump_around_label == NULL_RTX)
25000 jump_around_label = gen_label_rtx ();
25001 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25002 LEU, 0, counter_mode (count_exp),
25003 1, hot_label);
25004 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25005 if (issetmem)
25006 set_storage_via_libcall (dst, count_exp, val_exp, false);
25007 else
25008 emit_block_move_via_libcall (dst, src, count_exp, false);
25009 emit_jump (jump_around_label);
25010 emit_label (hot_label);
25014 /* Step 2: Alignment prologue. */
25015 /* Do the expensive promotion once we branched off the small blocks. */
25016 if (issetmem && !promoted_val)
25017 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25018 desired_align, align);
25020 if (desired_align > align && !misaligned_prologue_used)
25022 if (align_bytes == 0)
25024 /* Except for the first move in prologue, we no longer know
25025 constant offset in aliasing info. It don't seems to worth
25026 the pain to maintain it for the first move, so throw away
25027 the info early. */
25028 dst = change_address (dst, BLKmode, destreg);
25029 if (!issetmem)
25030 src = change_address (src, BLKmode, srcreg);
25031 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25032 promoted_val, vec_promoted_val,
25033 count_exp, align, desired_align,
25034 issetmem);
25035 /* At most desired_align - align bytes are copied. */
25036 if (min_size < (unsigned)(desired_align - align))
25037 min_size = 0;
25038 else
25039 min_size -= desired_align - align;
25041 else
25043 /* If we know how many bytes need to be stored before dst is
25044 sufficiently aligned, maintain aliasing info accurately. */
25045 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25046 srcreg,
25047 promoted_val,
25048 vec_promoted_val,
25049 desired_align,
25050 align_bytes,
25051 issetmem);
25053 count_exp = plus_constant (counter_mode (count_exp),
25054 count_exp, -align_bytes);
25055 count -= align_bytes;
25056 min_size -= align_bytes;
25057 max_size -= align_bytes;
25059 if (need_zero_guard
25060 && !min_size
25061 && (count < (unsigned HOST_WIDE_INT) size_needed
25062 || (align_bytes == 0
25063 && count < ((unsigned HOST_WIDE_INT) size_needed
25064 + desired_align - align))))
25066 /* It is possible that we copied enough so the main loop will not
25067 execute. */
25068 gcc_assert (size_needed > 1);
25069 if (label == NULL_RTX)
25070 label = gen_label_rtx ();
25071 emit_cmp_and_jump_insns (count_exp,
25072 GEN_INT (size_needed),
25073 LTU, 0, counter_mode (count_exp), 1, label);
25074 if (expected_size == -1
25075 || expected_size < (desired_align - align) / 2 + size_needed)
25076 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25077 else
25078 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25081 if (label && size_needed == 1)
25083 emit_label (label);
25084 LABEL_NUSES (label) = 1;
25085 label = NULL;
25086 epilogue_size_needed = 1;
25087 if (issetmem)
25088 promoted_val = val_exp;
25090 else if (label == NULL_RTX && !misaligned_prologue_used)
25091 epilogue_size_needed = size_needed;
25093 /* Step 3: Main loop. */
25095 switch (alg)
25097 case libcall:
25098 case no_stringop:
25099 case last_alg:
25100 gcc_unreachable ();
25101 case loop_1_byte:
25102 case loop:
25103 case unrolled_loop:
25104 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25105 count_exp, move_mode, unroll_factor,
25106 expected_size, issetmem);
25107 break;
25108 case vector_loop:
25109 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25110 vec_promoted_val, count_exp, move_mode,
25111 unroll_factor, expected_size, issetmem);
25112 break;
25113 case rep_prefix_8_byte:
25114 case rep_prefix_4_byte:
25115 case rep_prefix_1_byte:
25116 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25117 val_exp, count_exp, move_mode, issetmem);
25118 break;
25120 /* Adjust properly the offset of src and dest memory for aliasing. */
25121 if (CONST_INT_P (count_exp))
25123 if (!issetmem)
25124 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25125 (count / size_needed) * size_needed);
25126 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25127 (count / size_needed) * size_needed);
25129 else
25131 if (!issetmem)
25132 src = change_address (src, BLKmode, srcreg);
25133 dst = change_address (dst, BLKmode, destreg);
25136 /* Step 4: Epilogue to copy the remaining bytes. */
25137 epilogue:
25138 if (label)
25140 /* When the main loop is done, COUNT_EXP might hold original count,
25141 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25142 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25143 bytes. Compensate if needed. */
25145 if (size_needed < epilogue_size_needed)
25147 tmp =
25148 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25149 GEN_INT (size_needed - 1), count_exp, 1,
25150 OPTAB_DIRECT);
25151 if (tmp != count_exp)
25152 emit_move_insn (count_exp, tmp);
25154 emit_label (label);
25155 LABEL_NUSES (label) = 1;
25158 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25160 if (force_loopy_epilogue)
25161 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25162 epilogue_size_needed);
25163 else
25165 if (issetmem)
25166 expand_setmem_epilogue (dst, destreg, promoted_val,
25167 vec_promoted_val, count_exp,
25168 epilogue_size_needed);
25169 else
25170 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25171 epilogue_size_needed);
25174 if (jump_around_label)
25175 emit_label (jump_around_label);
25176 return true;
25180 /* Expand the appropriate insns for doing strlen if not just doing
25181 repnz; scasb
25183 out = result, initialized with the start address
25184 align_rtx = alignment of the address.
25185 scratch = scratch register, initialized with the startaddress when
25186 not aligned, otherwise undefined
25188 This is just the body. It needs the initializations mentioned above and
25189 some address computing at the end. These things are done in i386.md. */
25191 static void
25192 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25194 int align;
25195 rtx tmp;
25196 rtx_code_label *align_2_label = NULL;
25197 rtx_code_label *align_3_label = NULL;
25198 rtx_code_label *align_4_label = gen_label_rtx ();
25199 rtx_code_label *end_0_label = gen_label_rtx ();
25200 rtx mem;
25201 rtx tmpreg = gen_reg_rtx (SImode);
25202 rtx scratch = gen_reg_rtx (SImode);
25203 rtx cmp;
25205 align = 0;
25206 if (CONST_INT_P (align_rtx))
25207 align = INTVAL (align_rtx);
25209 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25211 /* Is there a known alignment and is it less than 4? */
25212 if (align < 4)
25214 rtx scratch1 = gen_reg_rtx (Pmode);
25215 emit_move_insn (scratch1, out);
25216 /* Is there a known alignment and is it not 2? */
25217 if (align != 2)
25219 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25220 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25222 /* Leave just the 3 lower bits. */
25223 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25224 NULL_RTX, 0, OPTAB_WIDEN);
25226 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25227 Pmode, 1, align_4_label);
25228 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25229 Pmode, 1, align_2_label);
25230 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25231 Pmode, 1, align_3_label);
25233 else
25235 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25236 check if is aligned to 4 - byte. */
25238 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25239 NULL_RTX, 0, OPTAB_WIDEN);
25241 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25242 Pmode, 1, align_4_label);
25245 mem = change_address (src, QImode, out);
25247 /* Now compare the bytes. */
25249 /* Compare the first n unaligned byte on a byte per byte basis. */
25250 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25251 QImode, 1, end_0_label);
25253 /* Increment the address. */
25254 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25256 /* Not needed with an alignment of 2 */
25257 if (align != 2)
25259 emit_label (align_2_label);
25261 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25262 end_0_label);
25264 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25266 emit_label (align_3_label);
25269 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25270 end_0_label);
25272 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25275 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25276 align this loop. It gives only huge programs, but does not help to
25277 speed up. */
25278 emit_label (align_4_label);
25280 mem = change_address (src, SImode, out);
25281 emit_move_insn (scratch, mem);
25282 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25284 /* This formula yields a nonzero result iff one of the bytes is zero.
25285 This saves three branches inside loop and many cycles. */
25287 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25288 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25289 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25290 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25291 gen_int_mode (0x80808080, SImode)));
25292 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25293 align_4_label);
25295 if (TARGET_CMOVE)
25297 rtx reg = gen_reg_rtx (SImode);
25298 rtx reg2 = gen_reg_rtx (Pmode);
25299 emit_move_insn (reg, tmpreg);
25300 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25302 /* If zero is not in the first two bytes, move two bytes forward. */
25303 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25304 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25305 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25306 emit_insn (gen_rtx_SET (tmpreg,
25307 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25308 reg,
25309 tmpreg)));
25310 /* Emit lea manually to avoid clobbering of flags. */
25311 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25313 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25314 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25315 emit_insn (gen_rtx_SET (out,
25316 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25317 reg2,
25318 out)));
25320 else
25322 rtx_code_label *end_2_label = gen_label_rtx ();
25323 /* Is zero in the first two bytes? */
25325 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25326 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25327 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25328 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25329 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25330 pc_rtx);
25331 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25332 JUMP_LABEL (tmp) = end_2_label;
25334 /* Not in the first two. Move two bytes forward. */
25335 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25336 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25338 emit_label (end_2_label);
25342 /* Avoid branch in fixing the byte. */
25343 tmpreg = gen_lowpart (QImode, tmpreg);
25344 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25345 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25346 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25347 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25349 emit_label (end_0_label);
25352 /* Expand strlen. */
25354 bool
25355 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25357 rtx addr, scratch1, scratch2, scratch3, scratch4;
25359 /* The generic case of strlen expander is long. Avoid it's
25360 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25362 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25363 && !TARGET_INLINE_ALL_STRINGOPS
25364 && !optimize_insn_for_size_p ()
25365 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25366 return false;
25368 addr = force_reg (Pmode, XEXP (src, 0));
25369 scratch1 = gen_reg_rtx (Pmode);
25371 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25372 && !optimize_insn_for_size_p ())
25374 /* Well it seems that some optimizer does not combine a call like
25375 foo(strlen(bar), strlen(bar));
25376 when the move and the subtraction is done here. It does calculate
25377 the length just once when these instructions are done inside of
25378 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25379 often used and I use one fewer register for the lifetime of
25380 output_strlen_unroll() this is better. */
25382 emit_move_insn (out, addr);
25384 ix86_expand_strlensi_unroll_1 (out, src, align);
25386 /* strlensi_unroll_1 returns the address of the zero at the end of
25387 the string, like memchr(), so compute the length by subtracting
25388 the start address. */
25389 emit_insn (ix86_gen_sub3 (out, out, addr));
25391 else
25393 rtx unspec;
25395 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25396 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25397 return false;
25399 scratch2 = gen_reg_rtx (Pmode);
25400 scratch3 = gen_reg_rtx (Pmode);
25401 scratch4 = force_reg (Pmode, constm1_rtx);
25403 emit_move_insn (scratch3, addr);
25404 eoschar = force_reg (QImode, eoschar);
25406 src = replace_equiv_address_nv (src, scratch3);
25408 /* If .md starts supporting :P, this can be done in .md. */
25409 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25410 scratch4), UNSPEC_SCAS);
25411 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25412 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25413 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25415 return true;
25418 /* For given symbol (function) construct code to compute address of it's PLT
25419 entry in large x86-64 PIC model. */
25420 static rtx
25421 construct_plt_address (rtx symbol)
25423 rtx tmp, unspec;
25425 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25426 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25427 gcc_assert (Pmode == DImode);
25429 tmp = gen_reg_rtx (Pmode);
25430 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25432 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25433 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25434 return tmp;
25438 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25439 rtx callarg2,
25440 rtx pop, bool sibcall)
25442 rtx vec[3];
25443 rtx use = NULL, call;
25444 unsigned int vec_len = 0;
25446 if (pop == const0_rtx)
25447 pop = NULL;
25448 gcc_assert (!TARGET_64BIT || !pop);
25450 if (TARGET_MACHO && !TARGET_64BIT)
25452 #if TARGET_MACHO
25453 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25454 fnaddr = machopic_indirect_call_target (fnaddr);
25455 #endif
25457 else
25459 /* Static functions and indirect calls don't need the pic register. */
25460 if (flag_pic
25461 && (!TARGET_64BIT
25462 || (ix86_cmodel == CM_LARGE_PIC
25463 && DEFAULT_ABI != MS_ABI))
25464 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25465 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25467 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25468 if (ix86_use_pseudo_pic_reg ())
25469 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25470 pic_offset_table_rtx);
25474 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25475 parameters passed in vector registers. */
25476 if (TARGET_64BIT
25477 && (INTVAL (callarg2) > 0
25478 || (INTVAL (callarg2) == 0
25479 && (TARGET_SSE || !flag_skip_rax_setup))))
25481 rtx al = gen_rtx_REG (QImode, AX_REG);
25482 emit_move_insn (al, callarg2);
25483 use_reg (&use, al);
25486 if (ix86_cmodel == CM_LARGE_PIC
25487 && !TARGET_PECOFF
25488 && MEM_P (fnaddr)
25489 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25490 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25491 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25492 else if (sibcall
25493 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25494 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25496 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25497 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25500 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25502 if (retval)
25504 /* We should add bounds as destination register in case
25505 pointer with bounds may be returned. */
25506 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25508 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25509 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25510 if (GET_CODE (retval) == PARALLEL)
25512 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25513 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25514 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25515 retval = chkp_join_splitted_slot (retval, par);
25517 else
25519 retval = gen_rtx_PARALLEL (VOIDmode,
25520 gen_rtvec (3, retval, b0, b1));
25521 chkp_put_regs_to_expr_list (retval);
25525 call = gen_rtx_SET (retval, call);
25527 vec[vec_len++] = call;
25529 if (pop)
25531 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25532 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25533 vec[vec_len++] = pop;
25536 if (TARGET_64BIT_MS_ABI
25537 && (!callarg2 || INTVAL (callarg2) != -2))
25539 int const cregs_size
25540 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25541 int i;
25543 for (i = 0; i < cregs_size; i++)
25545 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25546 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25548 clobber_reg (&use, gen_rtx_REG (mode, regno));
25552 if (vec_len > 1)
25553 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25554 call = emit_call_insn (call);
25555 if (use)
25556 CALL_INSN_FUNCTION_USAGE (call) = use;
25558 return call;
25561 /* Output the assembly for a call instruction. */
25563 const char *
25564 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25566 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25567 bool seh_nop_p = false;
25568 const char *xasm;
25570 if (SIBLING_CALL_P (insn))
25572 if (direct_p)
25573 xasm = "%!jmp\t%P0";
25574 /* SEH epilogue detection requires the indirect branch case
25575 to include REX.W. */
25576 else if (TARGET_SEH)
25577 xasm = "%!rex.W jmp %A0";
25578 else
25579 xasm = "%!jmp\t%A0";
25581 output_asm_insn (xasm, &call_op);
25582 return "";
25585 /* SEH unwinding can require an extra nop to be emitted in several
25586 circumstances. Determine if we have one of those. */
25587 if (TARGET_SEH)
25589 rtx_insn *i;
25591 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25593 /* If we get to another real insn, we don't need the nop. */
25594 if (INSN_P (i))
25595 break;
25597 /* If we get to the epilogue note, prevent a catch region from
25598 being adjacent to the standard epilogue sequence. If non-
25599 call-exceptions, we'll have done this during epilogue emission. */
25600 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25601 && !flag_non_call_exceptions
25602 && !can_throw_internal (insn))
25604 seh_nop_p = true;
25605 break;
25609 /* If we didn't find a real insn following the call, prevent the
25610 unwinder from looking into the next function. */
25611 if (i == NULL)
25612 seh_nop_p = true;
25615 if (direct_p)
25616 xasm = "%!call\t%P0";
25617 else
25618 xasm = "%!call\t%A0";
25620 output_asm_insn (xasm, &call_op);
25622 if (seh_nop_p)
25623 return "nop";
25625 return "";
25628 /* Clear stack slot assignments remembered from previous functions.
25629 This is called from INIT_EXPANDERS once before RTL is emitted for each
25630 function. */
25632 static struct machine_function *
25633 ix86_init_machine_status (void)
25635 struct machine_function *f;
25637 f = ggc_cleared_alloc<machine_function> ();
25638 f->use_fast_prologue_epilogue_nregs = -1;
25639 f->call_abi = ix86_abi;
25641 return f;
25644 /* Return a MEM corresponding to a stack slot with mode MODE.
25645 Allocate a new slot if necessary.
25647 The RTL for a function can have several slots available: N is
25648 which slot to use. */
25651 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25653 struct stack_local_entry *s;
25655 gcc_assert (n < MAX_386_STACK_LOCALS);
25657 for (s = ix86_stack_locals; s; s = s->next)
25658 if (s->mode == mode && s->n == n)
25659 return validize_mem (copy_rtx (s->rtl));
25661 s = ggc_alloc<stack_local_entry> ();
25662 s->n = n;
25663 s->mode = mode;
25664 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25666 s->next = ix86_stack_locals;
25667 ix86_stack_locals = s;
25668 return validize_mem (copy_rtx (s->rtl));
25671 static void
25672 ix86_instantiate_decls (void)
25674 struct stack_local_entry *s;
25676 for (s = ix86_stack_locals; s; s = s->next)
25677 if (s->rtl != NULL_RTX)
25678 instantiate_decl_rtl (s->rtl);
25681 /* Check whether x86 address PARTS is a pc-relative address. */
25683 static bool
25684 rip_relative_addr_p (struct ix86_address *parts)
25686 rtx base, index, disp;
25688 base = parts->base;
25689 index = parts->index;
25690 disp = parts->disp;
25692 if (disp && !base && !index)
25694 if (TARGET_64BIT)
25696 rtx symbol = disp;
25698 if (GET_CODE (disp) == CONST)
25699 symbol = XEXP (disp, 0);
25700 if (GET_CODE (symbol) == PLUS
25701 && CONST_INT_P (XEXP (symbol, 1)))
25702 symbol = XEXP (symbol, 0);
25704 if (GET_CODE (symbol) == LABEL_REF
25705 || (GET_CODE (symbol) == SYMBOL_REF
25706 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25707 || (GET_CODE (symbol) == UNSPEC
25708 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25709 || XINT (symbol, 1) == UNSPEC_PCREL
25710 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25711 return true;
25714 return false;
25717 /* Calculate the length of the memory address in the instruction encoding.
25718 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25719 or other prefixes. We never generate addr32 prefix for LEA insn. */
25722 memory_address_length (rtx addr, bool lea)
25724 struct ix86_address parts;
25725 rtx base, index, disp;
25726 int len;
25727 int ok;
25729 if (GET_CODE (addr) == PRE_DEC
25730 || GET_CODE (addr) == POST_INC
25731 || GET_CODE (addr) == PRE_MODIFY
25732 || GET_CODE (addr) == POST_MODIFY)
25733 return 0;
25735 ok = ix86_decompose_address (addr, &parts);
25736 gcc_assert (ok);
25738 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25740 /* If this is not LEA instruction, add the length of addr32 prefix. */
25741 if (TARGET_64BIT && !lea
25742 && (SImode_address_operand (addr, VOIDmode)
25743 || (parts.base && GET_MODE (parts.base) == SImode)
25744 || (parts.index && GET_MODE (parts.index) == SImode)))
25745 len++;
25747 base = parts.base;
25748 index = parts.index;
25749 disp = parts.disp;
25751 if (base && GET_CODE (base) == SUBREG)
25752 base = SUBREG_REG (base);
25753 if (index && GET_CODE (index) == SUBREG)
25754 index = SUBREG_REG (index);
25756 gcc_assert (base == NULL_RTX || REG_P (base));
25757 gcc_assert (index == NULL_RTX || REG_P (index));
25759 /* Rule of thumb:
25760 - esp as the base always wants an index,
25761 - ebp as the base always wants a displacement,
25762 - r12 as the base always wants an index,
25763 - r13 as the base always wants a displacement. */
25765 /* Register Indirect. */
25766 if (base && !index && !disp)
25768 /* esp (for its index) and ebp (for its displacement) need
25769 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25770 code. */
25771 if (base == arg_pointer_rtx
25772 || base == frame_pointer_rtx
25773 || REGNO (base) == SP_REG
25774 || REGNO (base) == BP_REG
25775 || REGNO (base) == R12_REG
25776 || REGNO (base) == R13_REG)
25777 len++;
25780 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25781 is not disp32, but disp32(%rip), so for disp32
25782 SIB byte is needed, unless print_operand_address
25783 optimizes it into disp32(%rip) or (%rip) is implied
25784 by UNSPEC. */
25785 else if (disp && !base && !index)
25787 len += 4;
25788 if (rip_relative_addr_p (&parts))
25789 len++;
25791 else
25793 /* Find the length of the displacement constant. */
25794 if (disp)
25796 if (base && satisfies_constraint_K (disp))
25797 len += 1;
25798 else
25799 len += 4;
25801 /* ebp always wants a displacement. Similarly r13. */
25802 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25803 len++;
25805 /* An index requires the two-byte modrm form.... */
25806 if (index
25807 /* ...like esp (or r12), which always wants an index. */
25808 || base == arg_pointer_rtx
25809 || base == frame_pointer_rtx
25810 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25811 len++;
25814 return len;
25817 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25818 is set, expect that insn have 8bit immediate alternative. */
25820 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25822 int len = 0;
25823 int i;
25824 extract_insn_cached (insn);
25825 for (i = recog_data.n_operands - 1; i >= 0; --i)
25826 if (CONSTANT_P (recog_data.operand[i]))
25828 enum attr_mode mode = get_attr_mode (insn);
25830 gcc_assert (!len);
25831 if (shortform && CONST_INT_P (recog_data.operand[i]))
25833 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25834 switch (mode)
25836 case MODE_QI:
25837 len = 1;
25838 continue;
25839 case MODE_HI:
25840 ival = trunc_int_for_mode (ival, HImode);
25841 break;
25842 case MODE_SI:
25843 ival = trunc_int_for_mode (ival, SImode);
25844 break;
25845 default:
25846 break;
25848 if (IN_RANGE (ival, -128, 127))
25850 len = 1;
25851 continue;
25854 switch (mode)
25856 case MODE_QI:
25857 len = 1;
25858 break;
25859 case MODE_HI:
25860 len = 2;
25861 break;
25862 case MODE_SI:
25863 len = 4;
25864 break;
25865 /* Immediates for DImode instructions are encoded
25866 as 32bit sign extended values. */
25867 case MODE_DI:
25868 len = 4;
25869 break;
25870 default:
25871 fatal_insn ("unknown insn mode", insn);
25874 return len;
25877 /* Compute default value for "length_address" attribute. */
25879 ix86_attr_length_address_default (rtx_insn *insn)
25881 int i;
25883 if (get_attr_type (insn) == TYPE_LEA)
25885 rtx set = PATTERN (insn), addr;
25887 if (GET_CODE (set) == PARALLEL)
25888 set = XVECEXP (set, 0, 0);
25890 gcc_assert (GET_CODE (set) == SET);
25892 addr = SET_SRC (set);
25894 return memory_address_length (addr, true);
25897 extract_insn_cached (insn);
25898 for (i = recog_data.n_operands - 1; i >= 0; --i)
25899 if (MEM_P (recog_data.operand[i]))
25901 constrain_operands_cached (insn, reload_completed);
25902 if (which_alternative != -1)
25904 const char *constraints = recog_data.constraints[i];
25905 int alt = which_alternative;
25907 while (*constraints == '=' || *constraints == '+')
25908 constraints++;
25909 while (alt-- > 0)
25910 while (*constraints++ != ',')
25912 /* Skip ignored operands. */
25913 if (*constraints == 'X')
25914 continue;
25916 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25918 return 0;
25921 /* Compute default value for "length_vex" attribute. It includes
25922 2 or 3 byte VEX prefix and 1 opcode byte. */
25925 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25926 bool has_vex_w)
25928 int i;
25930 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25931 byte VEX prefix. */
25932 if (!has_0f_opcode || has_vex_w)
25933 return 3 + 1;
25935 /* We can always use 2 byte VEX prefix in 32bit. */
25936 if (!TARGET_64BIT)
25937 return 2 + 1;
25939 extract_insn_cached (insn);
25941 for (i = recog_data.n_operands - 1; i >= 0; --i)
25942 if (REG_P (recog_data.operand[i]))
25944 /* REX.W bit uses 3 byte VEX prefix. */
25945 if (GET_MODE (recog_data.operand[i]) == DImode
25946 && GENERAL_REG_P (recog_data.operand[i]))
25947 return 3 + 1;
25949 else
25951 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25952 if (MEM_P (recog_data.operand[i])
25953 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25954 return 3 + 1;
25957 return 2 + 1;
25960 /* Return the maximum number of instructions a cpu can issue. */
25962 static int
25963 ix86_issue_rate (void)
25965 switch (ix86_tune)
25967 case PROCESSOR_PENTIUM:
25968 case PROCESSOR_BONNELL:
25969 case PROCESSOR_SILVERMONT:
25970 case PROCESSOR_KNL:
25971 case PROCESSOR_INTEL:
25972 case PROCESSOR_K6:
25973 case PROCESSOR_BTVER2:
25974 case PROCESSOR_PENTIUM4:
25975 case PROCESSOR_NOCONA:
25976 return 2;
25978 case PROCESSOR_PENTIUMPRO:
25979 case PROCESSOR_ATHLON:
25980 case PROCESSOR_K8:
25981 case PROCESSOR_AMDFAM10:
25982 case PROCESSOR_GENERIC:
25983 case PROCESSOR_BTVER1:
25984 return 3;
25986 case PROCESSOR_BDVER1:
25987 case PROCESSOR_BDVER2:
25988 case PROCESSOR_BDVER3:
25989 case PROCESSOR_BDVER4:
25990 case PROCESSOR_CORE2:
25991 case PROCESSOR_NEHALEM:
25992 case PROCESSOR_SANDYBRIDGE:
25993 case PROCESSOR_HASWELL:
25994 return 4;
25996 default:
25997 return 1;
26001 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26002 by DEP_INSN and nothing set by DEP_INSN. */
26004 static bool
26005 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26007 rtx set, set2;
26009 /* Simplify the test for uninteresting insns. */
26010 if (insn_type != TYPE_SETCC
26011 && insn_type != TYPE_ICMOV
26012 && insn_type != TYPE_FCMOV
26013 && insn_type != TYPE_IBR)
26014 return false;
26016 if ((set = single_set (dep_insn)) != 0)
26018 set = SET_DEST (set);
26019 set2 = NULL_RTX;
26021 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26022 && XVECLEN (PATTERN (dep_insn), 0) == 2
26023 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26024 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26026 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26027 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26029 else
26030 return false;
26032 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26033 return false;
26035 /* This test is true if the dependent insn reads the flags but
26036 not any other potentially set register. */
26037 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26038 return false;
26040 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26041 return false;
26043 return true;
26046 /* Return true iff USE_INSN has a memory address with operands set by
26047 SET_INSN. */
26049 bool
26050 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26052 int i;
26053 extract_insn_cached (use_insn);
26054 for (i = recog_data.n_operands - 1; i >= 0; --i)
26055 if (MEM_P (recog_data.operand[i]))
26057 rtx addr = XEXP (recog_data.operand[i], 0);
26058 return modified_in_p (addr, set_insn) != 0;
26060 return false;
26063 /* Helper function for exact_store_load_dependency.
26064 Return true if addr is found in insn. */
26065 static bool
26066 exact_dependency_1 (rtx addr, rtx insn)
26068 enum rtx_code code;
26069 const char *format_ptr;
26070 int i, j;
26072 code = GET_CODE (insn);
26073 switch (code)
26075 case MEM:
26076 if (rtx_equal_p (addr, insn))
26077 return true;
26078 break;
26079 case REG:
26080 CASE_CONST_ANY:
26081 case SYMBOL_REF:
26082 case CODE_LABEL:
26083 case PC:
26084 case CC0:
26085 case EXPR_LIST:
26086 return false;
26087 default:
26088 break;
26091 format_ptr = GET_RTX_FORMAT (code);
26092 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26094 switch (*format_ptr++)
26096 case 'e':
26097 if (exact_dependency_1 (addr, XEXP (insn, i)))
26098 return true;
26099 break;
26100 case 'E':
26101 for (j = 0; j < XVECLEN (insn, i); j++)
26102 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26103 return true;
26104 break;
26107 return false;
26110 /* Return true if there exists exact dependency for store & load, i.e.
26111 the same memory address is used in them. */
26112 static bool
26113 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26115 rtx set1, set2;
26117 set1 = single_set (store);
26118 if (!set1)
26119 return false;
26120 if (!MEM_P (SET_DEST (set1)))
26121 return false;
26122 set2 = single_set (load);
26123 if (!set2)
26124 return false;
26125 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26126 return true;
26127 return false;
26130 static int
26131 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26133 enum attr_type insn_type, dep_insn_type;
26134 enum attr_memory memory;
26135 rtx set, set2;
26136 int dep_insn_code_number;
26138 /* Anti and output dependencies have zero cost on all CPUs. */
26139 if (REG_NOTE_KIND (link) != 0)
26140 return 0;
26142 dep_insn_code_number = recog_memoized (dep_insn);
26144 /* If we can't recognize the insns, we can't really do anything. */
26145 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26146 return cost;
26148 insn_type = get_attr_type (insn);
26149 dep_insn_type = get_attr_type (dep_insn);
26151 switch (ix86_tune)
26153 case PROCESSOR_PENTIUM:
26154 /* Address Generation Interlock adds a cycle of latency. */
26155 if (insn_type == TYPE_LEA)
26157 rtx addr = PATTERN (insn);
26159 if (GET_CODE (addr) == PARALLEL)
26160 addr = XVECEXP (addr, 0, 0);
26162 gcc_assert (GET_CODE (addr) == SET);
26164 addr = SET_SRC (addr);
26165 if (modified_in_p (addr, dep_insn))
26166 cost += 1;
26168 else if (ix86_agi_dependent (dep_insn, insn))
26169 cost += 1;
26171 /* ??? Compares pair with jump/setcc. */
26172 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26173 cost = 0;
26175 /* Floating point stores require value to be ready one cycle earlier. */
26176 if (insn_type == TYPE_FMOV
26177 && get_attr_memory (insn) == MEMORY_STORE
26178 && !ix86_agi_dependent (dep_insn, insn))
26179 cost += 1;
26180 break;
26182 case PROCESSOR_PENTIUMPRO:
26183 /* INT->FP conversion is expensive. */
26184 if (get_attr_fp_int_src (dep_insn))
26185 cost += 5;
26187 /* There is one cycle extra latency between an FP op and a store. */
26188 if (insn_type == TYPE_FMOV
26189 && (set = single_set (dep_insn)) != NULL_RTX
26190 && (set2 = single_set (insn)) != NULL_RTX
26191 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26192 && MEM_P (SET_DEST (set2)))
26193 cost += 1;
26195 memory = get_attr_memory (insn);
26197 /* Show ability of reorder buffer to hide latency of load by executing
26198 in parallel with previous instruction in case
26199 previous instruction is not needed to compute the address. */
26200 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26201 && !ix86_agi_dependent (dep_insn, insn))
26203 /* Claim moves to take one cycle, as core can issue one load
26204 at time and the next load can start cycle later. */
26205 if (dep_insn_type == TYPE_IMOV
26206 || dep_insn_type == TYPE_FMOV)
26207 cost = 1;
26208 else if (cost > 1)
26209 cost--;
26211 break;
26213 case PROCESSOR_K6:
26214 /* The esp dependency is resolved before
26215 the instruction is really finished. */
26216 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26217 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26218 return 1;
26220 /* INT->FP conversion is expensive. */
26221 if (get_attr_fp_int_src (dep_insn))
26222 cost += 5;
26224 memory = get_attr_memory (insn);
26226 /* Show ability of reorder buffer to hide latency of load by executing
26227 in parallel with previous instruction in case
26228 previous instruction is not needed to compute the address. */
26229 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26230 && !ix86_agi_dependent (dep_insn, insn))
26232 /* Claim moves to take one cycle, as core can issue one load
26233 at time and the next load can start cycle later. */
26234 if (dep_insn_type == TYPE_IMOV
26235 || dep_insn_type == TYPE_FMOV)
26236 cost = 1;
26237 else if (cost > 2)
26238 cost -= 2;
26239 else
26240 cost = 1;
26242 break;
26244 case PROCESSOR_AMDFAM10:
26245 case PROCESSOR_BDVER1:
26246 case PROCESSOR_BDVER2:
26247 case PROCESSOR_BDVER3:
26248 case PROCESSOR_BDVER4:
26249 case PROCESSOR_BTVER1:
26250 case PROCESSOR_BTVER2:
26251 case PROCESSOR_GENERIC:
26252 /* Stack engine allows to execute push&pop instructions in parall. */
26253 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26254 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26255 return 0;
26256 /* FALLTHRU */
26258 case PROCESSOR_ATHLON:
26259 case PROCESSOR_K8:
26260 memory = get_attr_memory (insn);
26262 /* Show ability of reorder buffer to hide latency of load by executing
26263 in parallel with previous instruction in case
26264 previous instruction is not needed to compute the address. */
26265 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26266 && !ix86_agi_dependent (dep_insn, insn))
26268 enum attr_unit unit = get_attr_unit (insn);
26269 int loadcost = 3;
26271 /* Because of the difference between the length of integer and
26272 floating unit pipeline preparation stages, the memory operands
26273 for floating point are cheaper.
26275 ??? For Athlon it the difference is most probably 2. */
26276 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26277 loadcost = 3;
26278 else
26279 loadcost = TARGET_ATHLON ? 2 : 0;
26281 if (cost >= loadcost)
26282 cost -= loadcost;
26283 else
26284 cost = 0;
26286 break;
26288 case PROCESSOR_CORE2:
26289 case PROCESSOR_NEHALEM:
26290 case PROCESSOR_SANDYBRIDGE:
26291 case PROCESSOR_HASWELL:
26292 /* Stack engine allows to execute push&pop instructions in parall. */
26293 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26294 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26295 return 0;
26297 memory = get_attr_memory (insn);
26299 /* Show ability of reorder buffer to hide latency of load by executing
26300 in parallel with previous instruction in case
26301 previous instruction is not needed to compute the address. */
26302 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26303 && !ix86_agi_dependent (dep_insn, insn))
26305 if (cost >= 4)
26306 cost -= 4;
26307 else
26308 cost = 0;
26310 break;
26312 case PROCESSOR_SILVERMONT:
26313 case PROCESSOR_KNL:
26314 case PROCESSOR_INTEL:
26315 if (!reload_completed)
26316 return cost;
26318 /* Increase cost of integer loads. */
26319 memory = get_attr_memory (dep_insn);
26320 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26322 enum attr_unit unit = get_attr_unit (dep_insn);
26323 if (unit == UNIT_INTEGER && cost == 1)
26325 if (memory == MEMORY_LOAD)
26326 cost = 3;
26327 else
26329 /* Increase cost of ld/st for short int types only
26330 because of store forwarding issue. */
26331 rtx set = single_set (dep_insn);
26332 if (set && (GET_MODE (SET_DEST (set)) == QImode
26333 || GET_MODE (SET_DEST (set)) == HImode))
26335 /* Increase cost of store/load insn if exact
26336 dependence exists and it is load insn. */
26337 enum attr_memory insn_memory = get_attr_memory (insn);
26338 if (insn_memory == MEMORY_LOAD
26339 && exact_store_load_dependency (dep_insn, insn))
26340 cost = 3;
26346 default:
26347 break;
26350 return cost;
26353 /* How many alternative schedules to try. This should be as wide as the
26354 scheduling freedom in the DFA, but no wider. Making this value too
26355 large results extra work for the scheduler. */
26357 static int
26358 ia32_multipass_dfa_lookahead (void)
26360 switch (ix86_tune)
26362 case PROCESSOR_PENTIUM:
26363 return 2;
26365 case PROCESSOR_PENTIUMPRO:
26366 case PROCESSOR_K6:
26367 return 1;
26369 case PROCESSOR_BDVER1:
26370 case PROCESSOR_BDVER2:
26371 case PROCESSOR_BDVER3:
26372 case PROCESSOR_BDVER4:
26373 /* We use lookahead value 4 for BD both before and after reload
26374 schedules. Plan is to have value 8 included for O3. */
26375 return 4;
26377 case PROCESSOR_CORE2:
26378 case PROCESSOR_NEHALEM:
26379 case PROCESSOR_SANDYBRIDGE:
26380 case PROCESSOR_HASWELL:
26381 case PROCESSOR_BONNELL:
26382 case PROCESSOR_SILVERMONT:
26383 case PROCESSOR_KNL:
26384 case PROCESSOR_INTEL:
26385 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26386 as many instructions can be executed on a cycle, i.e.,
26387 issue_rate. I wonder why tuning for many CPUs does not do this. */
26388 if (reload_completed)
26389 return ix86_issue_rate ();
26390 /* Don't use lookahead for pre-reload schedule to save compile time. */
26391 return 0;
26393 default:
26394 return 0;
26398 /* Return true if target platform supports macro-fusion. */
26400 static bool
26401 ix86_macro_fusion_p ()
26403 return TARGET_FUSE_CMP_AND_BRANCH;
26406 /* Check whether current microarchitecture support macro fusion
26407 for insn pair "CONDGEN + CONDJMP". Refer to
26408 "Intel Architectures Optimization Reference Manual". */
26410 static bool
26411 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26413 rtx src, dest;
26414 enum rtx_code ccode;
26415 rtx compare_set = NULL_RTX, test_if, cond;
26416 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26418 if (!any_condjump_p (condjmp))
26419 return false;
26421 if (get_attr_type (condgen) != TYPE_TEST
26422 && get_attr_type (condgen) != TYPE_ICMP
26423 && get_attr_type (condgen) != TYPE_INCDEC
26424 && get_attr_type (condgen) != TYPE_ALU)
26425 return false;
26427 compare_set = single_set (condgen);
26428 if (compare_set == NULL_RTX
26429 && !TARGET_FUSE_ALU_AND_BRANCH)
26430 return false;
26432 if (compare_set == NULL_RTX)
26434 int i;
26435 rtx pat = PATTERN (condgen);
26436 for (i = 0; i < XVECLEN (pat, 0); i++)
26437 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26439 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26440 if (GET_CODE (set_src) == COMPARE)
26441 compare_set = XVECEXP (pat, 0, i);
26442 else
26443 alu_set = XVECEXP (pat, 0, i);
26446 if (compare_set == NULL_RTX)
26447 return false;
26448 src = SET_SRC (compare_set);
26449 if (GET_CODE (src) != COMPARE)
26450 return false;
26452 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26453 supported. */
26454 if ((MEM_P (XEXP (src, 0))
26455 && CONST_INT_P (XEXP (src, 1)))
26456 || (MEM_P (XEXP (src, 1))
26457 && CONST_INT_P (XEXP (src, 0))))
26458 return false;
26460 /* No fusion for RIP-relative address. */
26461 if (MEM_P (XEXP (src, 0)))
26462 addr = XEXP (XEXP (src, 0), 0);
26463 else if (MEM_P (XEXP (src, 1)))
26464 addr = XEXP (XEXP (src, 1), 0);
26466 if (addr) {
26467 ix86_address parts;
26468 int ok = ix86_decompose_address (addr, &parts);
26469 gcc_assert (ok);
26471 if (rip_relative_addr_p (&parts))
26472 return false;
26475 test_if = SET_SRC (pc_set (condjmp));
26476 cond = XEXP (test_if, 0);
26477 ccode = GET_CODE (cond);
26478 /* Check whether conditional jump use Sign or Overflow Flags. */
26479 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26480 && (ccode == GE
26481 || ccode == GT
26482 || ccode == LE
26483 || ccode == LT))
26484 return false;
26486 /* Return true for TYPE_TEST and TYPE_ICMP. */
26487 if (get_attr_type (condgen) == TYPE_TEST
26488 || get_attr_type (condgen) == TYPE_ICMP)
26489 return true;
26491 /* The following is the case that macro-fusion for alu + jmp. */
26492 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26493 return false;
26495 /* No fusion for alu op with memory destination operand. */
26496 dest = SET_DEST (alu_set);
26497 if (MEM_P (dest))
26498 return false;
26500 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26501 supported. */
26502 if (get_attr_type (condgen) == TYPE_INCDEC
26503 && (ccode == GEU
26504 || ccode == GTU
26505 || ccode == LEU
26506 || ccode == LTU))
26507 return false;
26509 return true;
26512 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26513 execution. It is applied if
26514 (1) IMUL instruction is on the top of list;
26515 (2) There exists the only producer of independent IMUL instruction in
26516 ready list.
26517 Return index of IMUL producer if it was found and -1 otherwise. */
26518 static int
26519 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26521 rtx_insn *insn;
26522 rtx set, insn1, insn2;
26523 sd_iterator_def sd_it;
26524 dep_t dep;
26525 int index = -1;
26526 int i;
26528 if (!TARGET_BONNELL)
26529 return index;
26531 /* Check that IMUL instruction is on the top of ready list. */
26532 insn = ready[n_ready - 1];
26533 set = single_set (insn);
26534 if (!set)
26535 return index;
26536 if (!(GET_CODE (SET_SRC (set)) == MULT
26537 && GET_MODE (SET_SRC (set)) == SImode))
26538 return index;
26540 /* Search for producer of independent IMUL instruction. */
26541 for (i = n_ready - 2; i >= 0; i--)
26543 insn = ready[i];
26544 if (!NONDEBUG_INSN_P (insn))
26545 continue;
26546 /* Skip IMUL instruction. */
26547 insn2 = PATTERN (insn);
26548 if (GET_CODE (insn2) == PARALLEL)
26549 insn2 = XVECEXP (insn2, 0, 0);
26550 if (GET_CODE (insn2) == SET
26551 && GET_CODE (SET_SRC (insn2)) == MULT
26552 && GET_MODE (SET_SRC (insn2)) == SImode)
26553 continue;
26555 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26557 rtx con;
26558 con = DEP_CON (dep);
26559 if (!NONDEBUG_INSN_P (con))
26560 continue;
26561 insn1 = PATTERN (con);
26562 if (GET_CODE (insn1) == PARALLEL)
26563 insn1 = XVECEXP (insn1, 0, 0);
26565 if (GET_CODE (insn1) == SET
26566 && GET_CODE (SET_SRC (insn1)) == MULT
26567 && GET_MODE (SET_SRC (insn1)) == SImode)
26569 sd_iterator_def sd_it1;
26570 dep_t dep1;
26571 /* Check if there is no other dependee for IMUL. */
26572 index = i;
26573 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26575 rtx pro;
26576 pro = DEP_PRO (dep1);
26577 if (!NONDEBUG_INSN_P (pro))
26578 continue;
26579 if (pro != insn)
26580 index = -1;
26582 if (index >= 0)
26583 break;
26586 if (index >= 0)
26587 break;
26589 return index;
26592 /* Try to find the best candidate on the top of ready list if two insns
26593 have the same priority - candidate is best if its dependees were
26594 scheduled earlier. Applied for Silvermont only.
26595 Return true if top 2 insns must be interchanged. */
26596 static bool
26597 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26599 rtx_insn *top = ready[n_ready - 1];
26600 rtx_insn *next = ready[n_ready - 2];
26601 rtx set;
26602 sd_iterator_def sd_it;
26603 dep_t dep;
26604 int clock1 = -1;
26605 int clock2 = -1;
26606 #define INSN_TICK(INSN) (HID (INSN)->tick)
26608 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26609 return false;
26611 if (!NONDEBUG_INSN_P (top))
26612 return false;
26613 if (!NONJUMP_INSN_P (top))
26614 return false;
26615 if (!NONDEBUG_INSN_P (next))
26616 return false;
26617 if (!NONJUMP_INSN_P (next))
26618 return false;
26619 set = single_set (top);
26620 if (!set)
26621 return false;
26622 set = single_set (next);
26623 if (!set)
26624 return false;
26626 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26628 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26629 return false;
26630 /* Determine winner more precise. */
26631 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26633 rtx pro;
26634 pro = DEP_PRO (dep);
26635 if (!NONDEBUG_INSN_P (pro))
26636 continue;
26637 if (INSN_TICK (pro) > clock1)
26638 clock1 = INSN_TICK (pro);
26640 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26642 rtx pro;
26643 pro = DEP_PRO (dep);
26644 if (!NONDEBUG_INSN_P (pro))
26645 continue;
26646 if (INSN_TICK (pro) > clock2)
26647 clock2 = INSN_TICK (pro);
26650 if (clock1 == clock2)
26652 /* Determine winner - load must win. */
26653 enum attr_memory memory1, memory2;
26654 memory1 = get_attr_memory (top);
26655 memory2 = get_attr_memory (next);
26656 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26657 return true;
26659 return (bool) (clock2 < clock1);
26661 return false;
26662 #undef INSN_TICK
26665 /* Perform possible reodering of ready list for Atom/Silvermont only.
26666 Return issue rate. */
26667 static int
26668 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26669 int *pn_ready, int clock_var)
26671 int issue_rate = -1;
26672 int n_ready = *pn_ready;
26673 int i;
26674 rtx_insn *insn;
26675 int index = -1;
26677 /* Set up issue rate. */
26678 issue_rate = ix86_issue_rate ();
26680 /* Do reodering for BONNELL/SILVERMONT only. */
26681 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26682 return issue_rate;
26684 /* Nothing to do if ready list contains only 1 instruction. */
26685 if (n_ready <= 1)
26686 return issue_rate;
26688 /* Do reodering for post-reload scheduler only. */
26689 if (!reload_completed)
26690 return issue_rate;
26692 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26694 if (sched_verbose > 1)
26695 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26696 INSN_UID (ready[index]));
26698 /* Put IMUL producer (ready[index]) at the top of ready list. */
26699 insn = ready[index];
26700 for (i = index; i < n_ready - 1; i++)
26701 ready[i] = ready[i + 1];
26702 ready[n_ready - 1] = insn;
26703 return issue_rate;
26706 /* Skip selective scheduling since HID is not populated in it. */
26707 if (clock_var != 0
26708 && !sel_sched_p ()
26709 && swap_top_of_ready_list (ready, n_ready))
26711 if (sched_verbose > 1)
26712 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26713 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26714 /* Swap 2 top elements of ready list. */
26715 insn = ready[n_ready - 1];
26716 ready[n_ready - 1] = ready[n_ready - 2];
26717 ready[n_ready - 2] = insn;
26719 return issue_rate;
26722 static bool
26723 ix86_class_likely_spilled_p (reg_class_t);
26725 /* Returns true if lhs of insn is HW function argument register and set up
26726 is_spilled to true if it is likely spilled HW register. */
26727 static bool
26728 insn_is_function_arg (rtx insn, bool* is_spilled)
26730 rtx dst;
26732 if (!NONDEBUG_INSN_P (insn))
26733 return false;
26734 /* Call instructions are not movable, ignore it. */
26735 if (CALL_P (insn))
26736 return false;
26737 insn = PATTERN (insn);
26738 if (GET_CODE (insn) == PARALLEL)
26739 insn = XVECEXP (insn, 0, 0);
26740 if (GET_CODE (insn) != SET)
26741 return false;
26742 dst = SET_DEST (insn);
26743 if (REG_P (dst) && HARD_REGISTER_P (dst)
26744 && ix86_function_arg_regno_p (REGNO (dst)))
26746 /* Is it likely spilled HW register? */
26747 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26748 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26749 *is_spilled = true;
26750 return true;
26752 return false;
26755 /* Add output dependencies for chain of function adjacent arguments if only
26756 there is a move to likely spilled HW register. Return first argument
26757 if at least one dependence was added or NULL otherwise. */
26758 static rtx_insn *
26759 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26761 rtx_insn *insn;
26762 rtx_insn *last = call;
26763 rtx_insn *first_arg = NULL;
26764 bool is_spilled = false;
26766 head = PREV_INSN (head);
26768 /* Find nearest to call argument passing instruction. */
26769 while (true)
26771 last = PREV_INSN (last);
26772 if (last == head)
26773 return NULL;
26774 if (!NONDEBUG_INSN_P (last))
26775 continue;
26776 if (insn_is_function_arg (last, &is_spilled))
26777 break;
26778 return NULL;
26781 first_arg = last;
26782 while (true)
26784 insn = PREV_INSN (last);
26785 if (!INSN_P (insn))
26786 break;
26787 if (insn == head)
26788 break;
26789 if (!NONDEBUG_INSN_P (insn))
26791 last = insn;
26792 continue;
26794 if (insn_is_function_arg (insn, &is_spilled))
26796 /* Add output depdendence between two function arguments if chain
26797 of output arguments contains likely spilled HW registers. */
26798 if (is_spilled)
26799 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26800 first_arg = last = insn;
26802 else
26803 break;
26805 if (!is_spilled)
26806 return NULL;
26807 return first_arg;
26810 /* Add output or anti dependency from insn to first_arg to restrict its code
26811 motion. */
26812 static void
26813 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26815 rtx set;
26816 rtx tmp;
26818 /* Add anti dependencies for bounds stores. */
26819 if (INSN_P (insn)
26820 && GET_CODE (PATTERN (insn)) == PARALLEL
26821 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26822 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26824 add_dependence (first_arg, insn, REG_DEP_ANTI);
26825 return;
26828 set = single_set (insn);
26829 if (!set)
26830 return;
26831 tmp = SET_DEST (set);
26832 if (REG_P (tmp))
26834 /* Add output dependency to the first function argument. */
26835 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26836 return;
26838 /* Add anti dependency. */
26839 add_dependence (first_arg, insn, REG_DEP_ANTI);
26842 /* Avoid cross block motion of function argument through adding dependency
26843 from the first non-jump instruction in bb. */
26844 static void
26845 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26847 rtx_insn *insn = BB_END (bb);
26849 while (insn)
26851 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26853 rtx set = single_set (insn);
26854 if (set)
26856 avoid_func_arg_motion (arg, insn);
26857 return;
26860 if (insn == BB_HEAD (bb))
26861 return;
26862 insn = PREV_INSN (insn);
26866 /* Hook for pre-reload schedule - avoid motion of function arguments
26867 passed in likely spilled HW registers. */
26868 static void
26869 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26871 rtx_insn *insn;
26872 rtx_insn *first_arg = NULL;
26873 if (reload_completed)
26874 return;
26875 while (head != tail && DEBUG_INSN_P (head))
26876 head = NEXT_INSN (head);
26877 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26878 if (INSN_P (insn) && CALL_P (insn))
26880 first_arg = add_parameter_dependencies (insn, head);
26881 if (first_arg)
26883 /* Add dependee for first argument to predecessors if only
26884 region contains more than one block. */
26885 basic_block bb = BLOCK_FOR_INSN (insn);
26886 int rgn = CONTAINING_RGN (bb->index);
26887 int nr_blks = RGN_NR_BLOCKS (rgn);
26888 /* Skip trivial regions and region head blocks that can have
26889 predecessors outside of region. */
26890 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26892 edge e;
26893 edge_iterator ei;
26895 /* Regions are SCCs with the exception of selective
26896 scheduling with pipelining of outer blocks enabled.
26897 So also check that immediate predecessors of a non-head
26898 block are in the same region. */
26899 FOR_EACH_EDGE (e, ei, bb->preds)
26901 /* Avoid creating of loop-carried dependencies through
26902 using topological ordering in the region. */
26903 if (rgn == CONTAINING_RGN (e->src->index)
26904 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26905 add_dependee_for_func_arg (first_arg, e->src);
26908 insn = first_arg;
26909 if (insn == head)
26910 break;
26913 else if (first_arg)
26914 avoid_func_arg_motion (first_arg, insn);
26917 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26918 HW registers to maximum, to schedule them at soon as possible. These are
26919 moves from function argument registers at the top of the function entry
26920 and moves from function return value registers after call. */
26921 static int
26922 ix86_adjust_priority (rtx_insn *insn, int priority)
26924 rtx set;
26926 if (reload_completed)
26927 return priority;
26929 if (!NONDEBUG_INSN_P (insn))
26930 return priority;
26932 set = single_set (insn);
26933 if (set)
26935 rtx tmp = SET_SRC (set);
26936 if (REG_P (tmp)
26937 && HARD_REGISTER_P (tmp)
26938 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26939 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26940 return current_sched_info->sched_max_insns_priority;
26943 return priority;
26946 /* Model decoder of Core 2/i7.
26947 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26948 track the instruction fetch block boundaries and make sure that long
26949 (9+ bytes) instructions are assigned to D0. */
26951 /* Maximum length of an insn that can be handled by
26952 a secondary decoder unit. '8' for Core 2/i7. */
26953 static int core2i7_secondary_decoder_max_insn_size;
26955 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26956 '16' for Core 2/i7. */
26957 static int core2i7_ifetch_block_size;
26959 /* Maximum number of instructions decoder can handle per cycle.
26960 '6' for Core 2/i7. */
26961 static int core2i7_ifetch_block_max_insns;
26963 typedef struct ix86_first_cycle_multipass_data_ *
26964 ix86_first_cycle_multipass_data_t;
26965 typedef const struct ix86_first_cycle_multipass_data_ *
26966 const_ix86_first_cycle_multipass_data_t;
26968 /* A variable to store target state across calls to max_issue within
26969 one cycle. */
26970 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26971 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26973 /* Initialize DATA. */
26974 static void
26975 core2i7_first_cycle_multipass_init (void *_data)
26977 ix86_first_cycle_multipass_data_t data
26978 = (ix86_first_cycle_multipass_data_t) _data;
26980 data->ifetch_block_len = 0;
26981 data->ifetch_block_n_insns = 0;
26982 data->ready_try_change = NULL;
26983 data->ready_try_change_size = 0;
26986 /* Advancing the cycle; reset ifetch block counts. */
26987 static void
26988 core2i7_dfa_post_advance_cycle (void)
26990 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26992 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26994 data->ifetch_block_len = 0;
26995 data->ifetch_block_n_insns = 0;
26998 static int min_insn_size (rtx_insn *);
27000 /* Filter out insns from ready_try that the core will not be able to issue
27001 on current cycle due to decoder. */
27002 static void
27003 core2i7_first_cycle_multipass_filter_ready_try
27004 (const_ix86_first_cycle_multipass_data_t data,
27005 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27007 while (n_ready--)
27009 rtx_insn *insn;
27010 int insn_size;
27012 if (ready_try[n_ready])
27013 continue;
27015 insn = get_ready_element (n_ready);
27016 insn_size = min_insn_size (insn);
27018 if (/* If this is a too long an insn for a secondary decoder ... */
27019 (!first_cycle_insn_p
27020 && insn_size > core2i7_secondary_decoder_max_insn_size)
27021 /* ... or it would not fit into the ifetch block ... */
27022 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27023 /* ... or the decoder is full already ... */
27024 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27025 /* ... mask the insn out. */
27027 ready_try[n_ready] = 1;
27029 if (data->ready_try_change)
27030 bitmap_set_bit (data->ready_try_change, n_ready);
27035 /* Prepare for a new round of multipass lookahead scheduling. */
27036 static void
27037 core2i7_first_cycle_multipass_begin (void *_data,
27038 signed char *ready_try, int n_ready,
27039 bool first_cycle_insn_p)
27041 ix86_first_cycle_multipass_data_t data
27042 = (ix86_first_cycle_multipass_data_t) _data;
27043 const_ix86_first_cycle_multipass_data_t prev_data
27044 = ix86_first_cycle_multipass_data;
27046 /* Restore the state from the end of the previous round. */
27047 data->ifetch_block_len = prev_data->ifetch_block_len;
27048 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27050 /* Filter instructions that cannot be issued on current cycle due to
27051 decoder restrictions. */
27052 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27053 first_cycle_insn_p);
27056 /* INSN is being issued in current solution. Account for its impact on
27057 the decoder model. */
27058 static void
27059 core2i7_first_cycle_multipass_issue (void *_data,
27060 signed char *ready_try, int n_ready,
27061 rtx_insn *insn, const void *_prev_data)
27063 ix86_first_cycle_multipass_data_t data
27064 = (ix86_first_cycle_multipass_data_t) _data;
27065 const_ix86_first_cycle_multipass_data_t prev_data
27066 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27068 int insn_size = min_insn_size (insn);
27070 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27071 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27072 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27073 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27075 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27076 if (!data->ready_try_change)
27078 data->ready_try_change = sbitmap_alloc (n_ready);
27079 data->ready_try_change_size = n_ready;
27081 else if (data->ready_try_change_size < n_ready)
27083 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27084 n_ready, 0);
27085 data->ready_try_change_size = n_ready;
27087 bitmap_clear (data->ready_try_change);
27089 /* Filter out insns from ready_try that the core will not be able to issue
27090 on current cycle due to decoder. */
27091 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27092 false);
27095 /* Revert the effect on ready_try. */
27096 static void
27097 core2i7_first_cycle_multipass_backtrack (const void *_data,
27098 signed char *ready_try,
27099 int n_ready ATTRIBUTE_UNUSED)
27101 const_ix86_first_cycle_multipass_data_t data
27102 = (const_ix86_first_cycle_multipass_data_t) _data;
27103 unsigned int i = 0;
27104 sbitmap_iterator sbi;
27106 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27107 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27109 ready_try[i] = 0;
27113 /* Save the result of multipass lookahead scheduling for the next round. */
27114 static void
27115 core2i7_first_cycle_multipass_end (const void *_data)
27117 const_ix86_first_cycle_multipass_data_t data
27118 = (const_ix86_first_cycle_multipass_data_t) _data;
27119 ix86_first_cycle_multipass_data_t next_data
27120 = ix86_first_cycle_multipass_data;
27122 if (data != NULL)
27124 next_data->ifetch_block_len = data->ifetch_block_len;
27125 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27129 /* Deallocate target data. */
27130 static void
27131 core2i7_first_cycle_multipass_fini (void *_data)
27133 ix86_first_cycle_multipass_data_t data
27134 = (ix86_first_cycle_multipass_data_t) _data;
27136 if (data->ready_try_change)
27138 sbitmap_free (data->ready_try_change);
27139 data->ready_try_change = NULL;
27140 data->ready_try_change_size = 0;
27144 /* Prepare for scheduling pass. */
27145 static void
27146 ix86_sched_init_global (FILE *, int, int)
27148 /* Install scheduling hooks for current CPU. Some of these hooks are used
27149 in time-critical parts of the scheduler, so we only set them up when
27150 they are actually used. */
27151 switch (ix86_tune)
27153 case PROCESSOR_CORE2:
27154 case PROCESSOR_NEHALEM:
27155 case PROCESSOR_SANDYBRIDGE:
27156 case PROCESSOR_HASWELL:
27157 /* Do not perform multipass scheduling for pre-reload schedule
27158 to save compile time. */
27159 if (reload_completed)
27161 targetm.sched.dfa_post_advance_cycle
27162 = core2i7_dfa_post_advance_cycle;
27163 targetm.sched.first_cycle_multipass_init
27164 = core2i7_first_cycle_multipass_init;
27165 targetm.sched.first_cycle_multipass_begin
27166 = core2i7_first_cycle_multipass_begin;
27167 targetm.sched.first_cycle_multipass_issue
27168 = core2i7_first_cycle_multipass_issue;
27169 targetm.sched.first_cycle_multipass_backtrack
27170 = core2i7_first_cycle_multipass_backtrack;
27171 targetm.sched.first_cycle_multipass_end
27172 = core2i7_first_cycle_multipass_end;
27173 targetm.sched.first_cycle_multipass_fini
27174 = core2i7_first_cycle_multipass_fini;
27176 /* Set decoder parameters. */
27177 core2i7_secondary_decoder_max_insn_size = 8;
27178 core2i7_ifetch_block_size = 16;
27179 core2i7_ifetch_block_max_insns = 6;
27180 break;
27182 /* ... Fall through ... */
27183 default:
27184 targetm.sched.dfa_post_advance_cycle = NULL;
27185 targetm.sched.first_cycle_multipass_init = NULL;
27186 targetm.sched.first_cycle_multipass_begin = NULL;
27187 targetm.sched.first_cycle_multipass_issue = NULL;
27188 targetm.sched.first_cycle_multipass_backtrack = NULL;
27189 targetm.sched.first_cycle_multipass_end = NULL;
27190 targetm.sched.first_cycle_multipass_fini = NULL;
27191 break;
27196 /* Compute the alignment given to a constant that is being placed in memory.
27197 EXP is the constant and ALIGN is the alignment that the object would
27198 ordinarily have.
27199 The value of this function is used instead of that alignment to align
27200 the object. */
27203 ix86_constant_alignment (tree exp, int align)
27205 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27206 || TREE_CODE (exp) == INTEGER_CST)
27208 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27209 return 64;
27210 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27211 return 128;
27213 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27214 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27215 return BITS_PER_WORD;
27217 return align;
27220 /* Compute the alignment for a static variable.
27221 TYPE is the data type, and ALIGN is the alignment that
27222 the object would ordinarily have. The value of this function is used
27223 instead of that alignment to align the object. */
27226 ix86_data_alignment (tree type, int align, bool opt)
27228 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27229 for symbols from other compilation units or symbols that don't need
27230 to bind locally. In order to preserve some ABI compatibility with
27231 those compilers, ensure we don't decrease alignment from what we
27232 used to assume. */
27234 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27236 /* A data structure, equal or greater than the size of a cache line
27237 (64 bytes in the Pentium 4 and other recent Intel processors, including
27238 processors based on Intel Core microarchitecture) should be aligned
27239 so that its base address is a multiple of a cache line size. */
27241 int max_align
27242 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27244 if (max_align < BITS_PER_WORD)
27245 max_align = BITS_PER_WORD;
27247 switch (ix86_align_data_type)
27249 case ix86_align_data_type_abi: opt = false; break;
27250 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27251 case ix86_align_data_type_cacheline: break;
27254 if (opt
27255 && AGGREGATE_TYPE_P (type)
27256 && TYPE_SIZE (type)
27257 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27259 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27260 && align < max_align_compat)
27261 align = max_align_compat;
27262 if (wi::geu_p (TYPE_SIZE (type), max_align)
27263 && align < max_align)
27264 align = max_align;
27267 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27268 to 16byte boundary. */
27269 if (TARGET_64BIT)
27271 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27272 && TYPE_SIZE (type)
27273 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27274 && wi::geu_p (TYPE_SIZE (type), 128)
27275 && align < 128)
27276 return 128;
27279 if (!opt)
27280 return align;
27282 if (TREE_CODE (type) == ARRAY_TYPE)
27284 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27285 return 64;
27286 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27287 return 128;
27289 else if (TREE_CODE (type) == COMPLEX_TYPE)
27292 if (TYPE_MODE (type) == DCmode && align < 64)
27293 return 64;
27294 if ((TYPE_MODE (type) == XCmode
27295 || TYPE_MODE (type) == TCmode) && align < 128)
27296 return 128;
27298 else if ((TREE_CODE (type) == RECORD_TYPE
27299 || TREE_CODE (type) == UNION_TYPE
27300 || TREE_CODE (type) == QUAL_UNION_TYPE)
27301 && TYPE_FIELDS (type))
27303 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27304 return 64;
27305 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27306 return 128;
27308 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27309 || TREE_CODE (type) == INTEGER_TYPE)
27311 if (TYPE_MODE (type) == DFmode && align < 64)
27312 return 64;
27313 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27314 return 128;
27317 return align;
27320 /* Compute the alignment for a local variable or a stack slot. EXP is
27321 the data type or decl itself, MODE is the widest mode available and
27322 ALIGN is the alignment that the object would ordinarily have. The
27323 value of this macro is used instead of that alignment to align the
27324 object. */
27326 unsigned int
27327 ix86_local_alignment (tree exp, machine_mode mode,
27328 unsigned int align)
27330 tree type, decl;
27332 if (exp && DECL_P (exp))
27334 type = TREE_TYPE (exp);
27335 decl = exp;
27337 else
27339 type = exp;
27340 decl = NULL;
27343 /* Don't do dynamic stack realignment for long long objects with
27344 -mpreferred-stack-boundary=2. */
27345 if (!TARGET_64BIT
27346 && align == 64
27347 && ix86_preferred_stack_boundary < 64
27348 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27349 && (!type || !TYPE_USER_ALIGN (type))
27350 && (!decl || !DECL_USER_ALIGN (decl)))
27351 align = 32;
27353 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27354 register in MODE. We will return the largest alignment of XF
27355 and DF. */
27356 if (!type)
27358 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27359 align = GET_MODE_ALIGNMENT (DFmode);
27360 return align;
27363 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27364 to 16byte boundary. Exact wording is:
27366 An array uses the same alignment as its elements, except that a local or
27367 global array variable of length at least 16 bytes or
27368 a C99 variable-length array variable always has alignment of at least 16 bytes.
27370 This was added to allow use of aligned SSE instructions at arrays. This
27371 rule is meant for static storage (where compiler can not do the analysis
27372 by itself). We follow it for automatic variables only when convenient.
27373 We fully control everything in the function compiled and functions from
27374 other unit can not rely on the alignment.
27376 Exclude va_list type. It is the common case of local array where
27377 we can not benefit from the alignment.
27379 TODO: Probably one should optimize for size only when var is not escaping. */
27380 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27381 && TARGET_SSE)
27383 if (AGGREGATE_TYPE_P (type)
27384 && (va_list_type_node == NULL_TREE
27385 || (TYPE_MAIN_VARIANT (type)
27386 != TYPE_MAIN_VARIANT (va_list_type_node)))
27387 && TYPE_SIZE (type)
27388 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27389 && wi::geu_p (TYPE_SIZE (type), 16)
27390 && align < 128)
27391 return 128;
27393 if (TREE_CODE (type) == ARRAY_TYPE)
27395 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27396 return 64;
27397 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27398 return 128;
27400 else if (TREE_CODE (type) == COMPLEX_TYPE)
27402 if (TYPE_MODE (type) == DCmode && align < 64)
27403 return 64;
27404 if ((TYPE_MODE (type) == XCmode
27405 || TYPE_MODE (type) == TCmode) && align < 128)
27406 return 128;
27408 else if ((TREE_CODE (type) == RECORD_TYPE
27409 || TREE_CODE (type) == UNION_TYPE
27410 || TREE_CODE (type) == QUAL_UNION_TYPE)
27411 && TYPE_FIELDS (type))
27413 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27414 return 64;
27415 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27416 return 128;
27418 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27419 || TREE_CODE (type) == INTEGER_TYPE)
27422 if (TYPE_MODE (type) == DFmode && align < 64)
27423 return 64;
27424 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27425 return 128;
27427 return align;
27430 /* Compute the minimum required alignment for dynamic stack realignment
27431 purposes for a local variable, parameter or a stack slot. EXP is
27432 the data type or decl itself, MODE is its mode and ALIGN is the
27433 alignment that the object would ordinarily have. */
27435 unsigned int
27436 ix86_minimum_alignment (tree exp, machine_mode mode,
27437 unsigned int align)
27439 tree type, decl;
27441 if (exp && DECL_P (exp))
27443 type = TREE_TYPE (exp);
27444 decl = exp;
27446 else
27448 type = exp;
27449 decl = NULL;
27452 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27453 return align;
27455 /* Don't do dynamic stack realignment for long long objects with
27456 -mpreferred-stack-boundary=2. */
27457 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27458 && (!type || !TYPE_USER_ALIGN (type))
27459 && (!decl || !DECL_USER_ALIGN (decl)))
27460 return 32;
27462 return align;
27465 /* Find a location for the static chain incoming to a nested function.
27466 This is a register, unless all free registers are used by arguments. */
27468 static rtx
27469 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27471 unsigned regno;
27473 /* While this function won't be called by the middle-end when a static
27474 chain isn't needed, it's also used throughout the backend so it's
27475 easiest to keep this check centralized. */
27476 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27477 return NULL;
27479 if (TARGET_64BIT)
27481 /* We always use R10 in 64-bit mode. */
27482 regno = R10_REG;
27484 else
27486 const_tree fntype, fndecl;
27487 unsigned int ccvt;
27489 /* By default in 32-bit mode we use ECX to pass the static chain. */
27490 regno = CX_REG;
27492 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27494 fntype = TREE_TYPE (fndecl_or_type);
27495 fndecl = fndecl_or_type;
27497 else
27499 fntype = fndecl_or_type;
27500 fndecl = NULL;
27503 ccvt = ix86_get_callcvt (fntype);
27504 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27506 /* Fastcall functions use ecx/edx for arguments, which leaves
27507 us with EAX for the static chain.
27508 Thiscall functions use ecx for arguments, which also
27509 leaves us with EAX for the static chain. */
27510 regno = AX_REG;
27512 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27514 /* Thiscall functions use ecx for arguments, which leaves
27515 us with EAX and EDX for the static chain.
27516 We are using for abi-compatibility EAX. */
27517 regno = AX_REG;
27519 else if (ix86_function_regparm (fntype, fndecl) == 3)
27521 /* For regparm 3, we have no free call-clobbered registers in
27522 which to store the static chain. In order to implement this,
27523 we have the trampoline push the static chain to the stack.
27524 However, we can't push a value below the return address when
27525 we call the nested function directly, so we have to use an
27526 alternate entry point. For this we use ESI, and have the
27527 alternate entry point push ESI, so that things appear the
27528 same once we're executing the nested function. */
27529 if (incoming_p)
27531 if (fndecl == current_function_decl)
27532 ix86_static_chain_on_stack = true;
27533 return gen_frame_mem (SImode,
27534 plus_constant (Pmode,
27535 arg_pointer_rtx, -8));
27537 regno = SI_REG;
27541 return gen_rtx_REG (Pmode, regno);
27544 /* Emit RTL insns to initialize the variable parts of a trampoline.
27545 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27546 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27547 to be passed to the target function. */
27549 static void
27550 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27552 rtx mem, fnaddr;
27553 int opcode;
27554 int offset = 0;
27556 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27558 if (TARGET_64BIT)
27560 int size;
27562 /* Load the function address to r11. Try to load address using
27563 the shorter movl instead of movabs. We may want to support
27564 movq for kernel mode, but kernel does not use trampolines at
27565 the moment. FNADDR is a 32bit address and may not be in
27566 DImode when ptr_mode == SImode. Always use movl in this
27567 case. */
27568 if (ptr_mode == SImode
27569 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27571 fnaddr = copy_addr_to_reg (fnaddr);
27573 mem = adjust_address (m_tramp, HImode, offset);
27574 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27576 mem = adjust_address (m_tramp, SImode, offset + 2);
27577 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27578 offset += 6;
27580 else
27582 mem = adjust_address (m_tramp, HImode, offset);
27583 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27585 mem = adjust_address (m_tramp, DImode, offset + 2);
27586 emit_move_insn (mem, fnaddr);
27587 offset += 10;
27590 /* Load static chain using movabs to r10. Use the shorter movl
27591 instead of movabs when ptr_mode == SImode. */
27592 if (ptr_mode == SImode)
27594 opcode = 0xba41;
27595 size = 6;
27597 else
27599 opcode = 0xba49;
27600 size = 10;
27603 mem = adjust_address (m_tramp, HImode, offset);
27604 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27606 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27607 emit_move_insn (mem, chain_value);
27608 offset += size;
27610 /* Jump to r11; the last (unused) byte is a nop, only there to
27611 pad the write out to a single 32-bit store. */
27612 mem = adjust_address (m_tramp, SImode, offset);
27613 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27614 offset += 4;
27616 else
27618 rtx disp, chain;
27620 /* Depending on the static chain location, either load a register
27621 with a constant, or push the constant to the stack. All of the
27622 instructions are the same size. */
27623 chain = ix86_static_chain (fndecl, true);
27624 if (REG_P (chain))
27626 switch (REGNO (chain))
27628 case AX_REG:
27629 opcode = 0xb8; break;
27630 case CX_REG:
27631 opcode = 0xb9; break;
27632 default:
27633 gcc_unreachable ();
27636 else
27637 opcode = 0x68;
27639 mem = adjust_address (m_tramp, QImode, offset);
27640 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27642 mem = adjust_address (m_tramp, SImode, offset + 1);
27643 emit_move_insn (mem, chain_value);
27644 offset += 5;
27646 mem = adjust_address (m_tramp, QImode, offset);
27647 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27649 mem = adjust_address (m_tramp, SImode, offset + 1);
27651 /* Compute offset from the end of the jmp to the target function.
27652 In the case in which the trampoline stores the static chain on
27653 the stack, we need to skip the first insn which pushes the
27654 (call-saved) register static chain; this push is 1 byte. */
27655 offset += 5;
27656 disp = expand_binop (SImode, sub_optab, fnaddr,
27657 plus_constant (Pmode, XEXP (m_tramp, 0),
27658 offset - (MEM_P (chain) ? 1 : 0)),
27659 NULL_RTX, 1, OPTAB_DIRECT);
27660 emit_move_insn (mem, disp);
27663 gcc_assert (offset <= TRAMPOLINE_SIZE);
27665 #ifdef HAVE_ENABLE_EXECUTE_STACK
27666 #ifdef CHECK_EXECUTE_STACK_ENABLED
27667 if (CHECK_EXECUTE_STACK_ENABLED)
27668 #endif
27669 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27670 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27671 #endif
27674 /* The following file contains several enumerations and data structures
27675 built from the definitions in i386-builtin-types.def. */
27677 #include "i386-builtin-types.inc"
27679 /* Table for the ix86 builtin non-function types. */
27680 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27682 /* Retrieve an element from the above table, building some of
27683 the types lazily. */
27685 static tree
27686 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27688 unsigned int index;
27689 tree type, itype;
27691 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27693 type = ix86_builtin_type_tab[(int) tcode];
27694 if (type != NULL)
27695 return type;
27697 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27698 if (tcode <= IX86_BT_LAST_VECT)
27700 machine_mode mode;
27702 index = tcode - IX86_BT_LAST_PRIM - 1;
27703 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27704 mode = ix86_builtin_type_vect_mode[index];
27706 type = build_vector_type_for_mode (itype, mode);
27708 else
27710 int quals;
27712 index = tcode - IX86_BT_LAST_VECT - 1;
27713 if (tcode <= IX86_BT_LAST_PTR)
27714 quals = TYPE_UNQUALIFIED;
27715 else
27716 quals = TYPE_QUAL_CONST;
27718 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27719 if (quals != TYPE_UNQUALIFIED)
27720 itype = build_qualified_type (itype, quals);
27722 type = build_pointer_type (itype);
27725 ix86_builtin_type_tab[(int) tcode] = type;
27726 return type;
27729 /* Table for the ix86 builtin function types. */
27730 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27732 /* Retrieve an element from the above table, building some of
27733 the types lazily. */
27735 static tree
27736 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27738 tree type;
27740 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27742 type = ix86_builtin_func_type_tab[(int) tcode];
27743 if (type != NULL)
27744 return type;
27746 if (tcode <= IX86_BT_LAST_FUNC)
27748 unsigned start = ix86_builtin_func_start[(int) tcode];
27749 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27750 tree rtype, atype, args = void_list_node;
27751 unsigned i;
27753 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27754 for (i = after - 1; i > start; --i)
27756 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27757 args = tree_cons (NULL, atype, args);
27760 type = build_function_type (rtype, args);
27762 else
27764 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27765 enum ix86_builtin_func_type icode;
27767 icode = ix86_builtin_func_alias_base[index];
27768 type = ix86_get_builtin_func_type (icode);
27771 ix86_builtin_func_type_tab[(int) tcode] = type;
27772 return type;
27776 /* Codes for all the SSE/MMX builtins. */
27777 enum ix86_builtins
27779 IX86_BUILTIN_ADDPS,
27780 IX86_BUILTIN_ADDSS,
27781 IX86_BUILTIN_DIVPS,
27782 IX86_BUILTIN_DIVSS,
27783 IX86_BUILTIN_MULPS,
27784 IX86_BUILTIN_MULSS,
27785 IX86_BUILTIN_SUBPS,
27786 IX86_BUILTIN_SUBSS,
27788 IX86_BUILTIN_CMPEQPS,
27789 IX86_BUILTIN_CMPLTPS,
27790 IX86_BUILTIN_CMPLEPS,
27791 IX86_BUILTIN_CMPGTPS,
27792 IX86_BUILTIN_CMPGEPS,
27793 IX86_BUILTIN_CMPNEQPS,
27794 IX86_BUILTIN_CMPNLTPS,
27795 IX86_BUILTIN_CMPNLEPS,
27796 IX86_BUILTIN_CMPNGTPS,
27797 IX86_BUILTIN_CMPNGEPS,
27798 IX86_BUILTIN_CMPORDPS,
27799 IX86_BUILTIN_CMPUNORDPS,
27800 IX86_BUILTIN_CMPEQSS,
27801 IX86_BUILTIN_CMPLTSS,
27802 IX86_BUILTIN_CMPLESS,
27803 IX86_BUILTIN_CMPNEQSS,
27804 IX86_BUILTIN_CMPNLTSS,
27805 IX86_BUILTIN_CMPNLESS,
27806 IX86_BUILTIN_CMPORDSS,
27807 IX86_BUILTIN_CMPUNORDSS,
27809 IX86_BUILTIN_COMIEQSS,
27810 IX86_BUILTIN_COMILTSS,
27811 IX86_BUILTIN_COMILESS,
27812 IX86_BUILTIN_COMIGTSS,
27813 IX86_BUILTIN_COMIGESS,
27814 IX86_BUILTIN_COMINEQSS,
27815 IX86_BUILTIN_UCOMIEQSS,
27816 IX86_BUILTIN_UCOMILTSS,
27817 IX86_BUILTIN_UCOMILESS,
27818 IX86_BUILTIN_UCOMIGTSS,
27819 IX86_BUILTIN_UCOMIGESS,
27820 IX86_BUILTIN_UCOMINEQSS,
27822 IX86_BUILTIN_CVTPI2PS,
27823 IX86_BUILTIN_CVTPS2PI,
27824 IX86_BUILTIN_CVTSI2SS,
27825 IX86_BUILTIN_CVTSI642SS,
27826 IX86_BUILTIN_CVTSS2SI,
27827 IX86_BUILTIN_CVTSS2SI64,
27828 IX86_BUILTIN_CVTTPS2PI,
27829 IX86_BUILTIN_CVTTSS2SI,
27830 IX86_BUILTIN_CVTTSS2SI64,
27832 IX86_BUILTIN_MAXPS,
27833 IX86_BUILTIN_MAXSS,
27834 IX86_BUILTIN_MINPS,
27835 IX86_BUILTIN_MINSS,
27837 IX86_BUILTIN_LOADUPS,
27838 IX86_BUILTIN_STOREUPS,
27839 IX86_BUILTIN_MOVSS,
27841 IX86_BUILTIN_MOVHLPS,
27842 IX86_BUILTIN_MOVLHPS,
27843 IX86_BUILTIN_LOADHPS,
27844 IX86_BUILTIN_LOADLPS,
27845 IX86_BUILTIN_STOREHPS,
27846 IX86_BUILTIN_STORELPS,
27848 IX86_BUILTIN_MASKMOVQ,
27849 IX86_BUILTIN_MOVMSKPS,
27850 IX86_BUILTIN_PMOVMSKB,
27852 IX86_BUILTIN_MOVNTPS,
27853 IX86_BUILTIN_MOVNTQ,
27855 IX86_BUILTIN_LOADDQU,
27856 IX86_BUILTIN_STOREDQU,
27858 IX86_BUILTIN_PACKSSWB,
27859 IX86_BUILTIN_PACKSSDW,
27860 IX86_BUILTIN_PACKUSWB,
27862 IX86_BUILTIN_PADDB,
27863 IX86_BUILTIN_PADDW,
27864 IX86_BUILTIN_PADDD,
27865 IX86_BUILTIN_PADDQ,
27866 IX86_BUILTIN_PADDSB,
27867 IX86_BUILTIN_PADDSW,
27868 IX86_BUILTIN_PADDUSB,
27869 IX86_BUILTIN_PADDUSW,
27870 IX86_BUILTIN_PSUBB,
27871 IX86_BUILTIN_PSUBW,
27872 IX86_BUILTIN_PSUBD,
27873 IX86_BUILTIN_PSUBQ,
27874 IX86_BUILTIN_PSUBSB,
27875 IX86_BUILTIN_PSUBSW,
27876 IX86_BUILTIN_PSUBUSB,
27877 IX86_BUILTIN_PSUBUSW,
27879 IX86_BUILTIN_PAND,
27880 IX86_BUILTIN_PANDN,
27881 IX86_BUILTIN_POR,
27882 IX86_BUILTIN_PXOR,
27884 IX86_BUILTIN_PAVGB,
27885 IX86_BUILTIN_PAVGW,
27887 IX86_BUILTIN_PCMPEQB,
27888 IX86_BUILTIN_PCMPEQW,
27889 IX86_BUILTIN_PCMPEQD,
27890 IX86_BUILTIN_PCMPGTB,
27891 IX86_BUILTIN_PCMPGTW,
27892 IX86_BUILTIN_PCMPGTD,
27894 IX86_BUILTIN_PMADDWD,
27896 IX86_BUILTIN_PMAXSW,
27897 IX86_BUILTIN_PMAXUB,
27898 IX86_BUILTIN_PMINSW,
27899 IX86_BUILTIN_PMINUB,
27901 IX86_BUILTIN_PMULHUW,
27902 IX86_BUILTIN_PMULHW,
27903 IX86_BUILTIN_PMULLW,
27905 IX86_BUILTIN_PSADBW,
27906 IX86_BUILTIN_PSHUFW,
27908 IX86_BUILTIN_PSLLW,
27909 IX86_BUILTIN_PSLLD,
27910 IX86_BUILTIN_PSLLQ,
27911 IX86_BUILTIN_PSRAW,
27912 IX86_BUILTIN_PSRAD,
27913 IX86_BUILTIN_PSRLW,
27914 IX86_BUILTIN_PSRLD,
27915 IX86_BUILTIN_PSRLQ,
27916 IX86_BUILTIN_PSLLWI,
27917 IX86_BUILTIN_PSLLDI,
27918 IX86_BUILTIN_PSLLQI,
27919 IX86_BUILTIN_PSRAWI,
27920 IX86_BUILTIN_PSRADI,
27921 IX86_BUILTIN_PSRLWI,
27922 IX86_BUILTIN_PSRLDI,
27923 IX86_BUILTIN_PSRLQI,
27925 IX86_BUILTIN_PUNPCKHBW,
27926 IX86_BUILTIN_PUNPCKHWD,
27927 IX86_BUILTIN_PUNPCKHDQ,
27928 IX86_BUILTIN_PUNPCKLBW,
27929 IX86_BUILTIN_PUNPCKLWD,
27930 IX86_BUILTIN_PUNPCKLDQ,
27932 IX86_BUILTIN_SHUFPS,
27934 IX86_BUILTIN_RCPPS,
27935 IX86_BUILTIN_RCPSS,
27936 IX86_BUILTIN_RSQRTPS,
27937 IX86_BUILTIN_RSQRTPS_NR,
27938 IX86_BUILTIN_RSQRTSS,
27939 IX86_BUILTIN_RSQRTF,
27940 IX86_BUILTIN_SQRTPS,
27941 IX86_BUILTIN_SQRTPS_NR,
27942 IX86_BUILTIN_SQRTSS,
27944 IX86_BUILTIN_UNPCKHPS,
27945 IX86_BUILTIN_UNPCKLPS,
27947 IX86_BUILTIN_ANDPS,
27948 IX86_BUILTIN_ANDNPS,
27949 IX86_BUILTIN_ORPS,
27950 IX86_BUILTIN_XORPS,
27952 IX86_BUILTIN_EMMS,
27953 IX86_BUILTIN_LDMXCSR,
27954 IX86_BUILTIN_STMXCSR,
27955 IX86_BUILTIN_SFENCE,
27957 IX86_BUILTIN_FXSAVE,
27958 IX86_BUILTIN_FXRSTOR,
27959 IX86_BUILTIN_FXSAVE64,
27960 IX86_BUILTIN_FXRSTOR64,
27962 IX86_BUILTIN_XSAVE,
27963 IX86_BUILTIN_XRSTOR,
27964 IX86_BUILTIN_XSAVE64,
27965 IX86_BUILTIN_XRSTOR64,
27967 IX86_BUILTIN_XSAVEOPT,
27968 IX86_BUILTIN_XSAVEOPT64,
27970 IX86_BUILTIN_XSAVEC,
27971 IX86_BUILTIN_XSAVEC64,
27973 IX86_BUILTIN_XSAVES,
27974 IX86_BUILTIN_XRSTORS,
27975 IX86_BUILTIN_XSAVES64,
27976 IX86_BUILTIN_XRSTORS64,
27978 /* 3DNow! Original */
27979 IX86_BUILTIN_FEMMS,
27980 IX86_BUILTIN_PAVGUSB,
27981 IX86_BUILTIN_PF2ID,
27982 IX86_BUILTIN_PFACC,
27983 IX86_BUILTIN_PFADD,
27984 IX86_BUILTIN_PFCMPEQ,
27985 IX86_BUILTIN_PFCMPGE,
27986 IX86_BUILTIN_PFCMPGT,
27987 IX86_BUILTIN_PFMAX,
27988 IX86_BUILTIN_PFMIN,
27989 IX86_BUILTIN_PFMUL,
27990 IX86_BUILTIN_PFRCP,
27991 IX86_BUILTIN_PFRCPIT1,
27992 IX86_BUILTIN_PFRCPIT2,
27993 IX86_BUILTIN_PFRSQIT1,
27994 IX86_BUILTIN_PFRSQRT,
27995 IX86_BUILTIN_PFSUB,
27996 IX86_BUILTIN_PFSUBR,
27997 IX86_BUILTIN_PI2FD,
27998 IX86_BUILTIN_PMULHRW,
28000 /* 3DNow! Athlon Extensions */
28001 IX86_BUILTIN_PF2IW,
28002 IX86_BUILTIN_PFNACC,
28003 IX86_BUILTIN_PFPNACC,
28004 IX86_BUILTIN_PI2FW,
28005 IX86_BUILTIN_PSWAPDSI,
28006 IX86_BUILTIN_PSWAPDSF,
28008 /* SSE2 */
28009 IX86_BUILTIN_ADDPD,
28010 IX86_BUILTIN_ADDSD,
28011 IX86_BUILTIN_DIVPD,
28012 IX86_BUILTIN_DIVSD,
28013 IX86_BUILTIN_MULPD,
28014 IX86_BUILTIN_MULSD,
28015 IX86_BUILTIN_SUBPD,
28016 IX86_BUILTIN_SUBSD,
28018 IX86_BUILTIN_CMPEQPD,
28019 IX86_BUILTIN_CMPLTPD,
28020 IX86_BUILTIN_CMPLEPD,
28021 IX86_BUILTIN_CMPGTPD,
28022 IX86_BUILTIN_CMPGEPD,
28023 IX86_BUILTIN_CMPNEQPD,
28024 IX86_BUILTIN_CMPNLTPD,
28025 IX86_BUILTIN_CMPNLEPD,
28026 IX86_BUILTIN_CMPNGTPD,
28027 IX86_BUILTIN_CMPNGEPD,
28028 IX86_BUILTIN_CMPORDPD,
28029 IX86_BUILTIN_CMPUNORDPD,
28030 IX86_BUILTIN_CMPEQSD,
28031 IX86_BUILTIN_CMPLTSD,
28032 IX86_BUILTIN_CMPLESD,
28033 IX86_BUILTIN_CMPNEQSD,
28034 IX86_BUILTIN_CMPNLTSD,
28035 IX86_BUILTIN_CMPNLESD,
28036 IX86_BUILTIN_CMPORDSD,
28037 IX86_BUILTIN_CMPUNORDSD,
28039 IX86_BUILTIN_COMIEQSD,
28040 IX86_BUILTIN_COMILTSD,
28041 IX86_BUILTIN_COMILESD,
28042 IX86_BUILTIN_COMIGTSD,
28043 IX86_BUILTIN_COMIGESD,
28044 IX86_BUILTIN_COMINEQSD,
28045 IX86_BUILTIN_UCOMIEQSD,
28046 IX86_BUILTIN_UCOMILTSD,
28047 IX86_BUILTIN_UCOMILESD,
28048 IX86_BUILTIN_UCOMIGTSD,
28049 IX86_BUILTIN_UCOMIGESD,
28050 IX86_BUILTIN_UCOMINEQSD,
28052 IX86_BUILTIN_MAXPD,
28053 IX86_BUILTIN_MAXSD,
28054 IX86_BUILTIN_MINPD,
28055 IX86_BUILTIN_MINSD,
28057 IX86_BUILTIN_ANDPD,
28058 IX86_BUILTIN_ANDNPD,
28059 IX86_BUILTIN_ORPD,
28060 IX86_BUILTIN_XORPD,
28062 IX86_BUILTIN_SQRTPD,
28063 IX86_BUILTIN_SQRTSD,
28065 IX86_BUILTIN_UNPCKHPD,
28066 IX86_BUILTIN_UNPCKLPD,
28068 IX86_BUILTIN_SHUFPD,
28070 IX86_BUILTIN_LOADUPD,
28071 IX86_BUILTIN_STOREUPD,
28072 IX86_BUILTIN_MOVSD,
28074 IX86_BUILTIN_LOADHPD,
28075 IX86_BUILTIN_LOADLPD,
28077 IX86_BUILTIN_CVTDQ2PD,
28078 IX86_BUILTIN_CVTDQ2PS,
28080 IX86_BUILTIN_CVTPD2DQ,
28081 IX86_BUILTIN_CVTPD2PI,
28082 IX86_BUILTIN_CVTPD2PS,
28083 IX86_BUILTIN_CVTTPD2DQ,
28084 IX86_BUILTIN_CVTTPD2PI,
28086 IX86_BUILTIN_CVTPI2PD,
28087 IX86_BUILTIN_CVTSI2SD,
28088 IX86_BUILTIN_CVTSI642SD,
28090 IX86_BUILTIN_CVTSD2SI,
28091 IX86_BUILTIN_CVTSD2SI64,
28092 IX86_BUILTIN_CVTSD2SS,
28093 IX86_BUILTIN_CVTSS2SD,
28094 IX86_BUILTIN_CVTTSD2SI,
28095 IX86_BUILTIN_CVTTSD2SI64,
28097 IX86_BUILTIN_CVTPS2DQ,
28098 IX86_BUILTIN_CVTPS2PD,
28099 IX86_BUILTIN_CVTTPS2DQ,
28101 IX86_BUILTIN_MOVNTI,
28102 IX86_BUILTIN_MOVNTI64,
28103 IX86_BUILTIN_MOVNTPD,
28104 IX86_BUILTIN_MOVNTDQ,
28106 IX86_BUILTIN_MOVQ128,
28108 /* SSE2 MMX */
28109 IX86_BUILTIN_MASKMOVDQU,
28110 IX86_BUILTIN_MOVMSKPD,
28111 IX86_BUILTIN_PMOVMSKB128,
28113 IX86_BUILTIN_PACKSSWB128,
28114 IX86_BUILTIN_PACKSSDW128,
28115 IX86_BUILTIN_PACKUSWB128,
28117 IX86_BUILTIN_PADDB128,
28118 IX86_BUILTIN_PADDW128,
28119 IX86_BUILTIN_PADDD128,
28120 IX86_BUILTIN_PADDQ128,
28121 IX86_BUILTIN_PADDSB128,
28122 IX86_BUILTIN_PADDSW128,
28123 IX86_BUILTIN_PADDUSB128,
28124 IX86_BUILTIN_PADDUSW128,
28125 IX86_BUILTIN_PSUBB128,
28126 IX86_BUILTIN_PSUBW128,
28127 IX86_BUILTIN_PSUBD128,
28128 IX86_BUILTIN_PSUBQ128,
28129 IX86_BUILTIN_PSUBSB128,
28130 IX86_BUILTIN_PSUBSW128,
28131 IX86_BUILTIN_PSUBUSB128,
28132 IX86_BUILTIN_PSUBUSW128,
28134 IX86_BUILTIN_PAND128,
28135 IX86_BUILTIN_PANDN128,
28136 IX86_BUILTIN_POR128,
28137 IX86_BUILTIN_PXOR128,
28139 IX86_BUILTIN_PAVGB128,
28140 IX86_BUILTIN_PAVGW128,
28142 IX86_BUILTIN_PCMPEQB128,
28143 IX86_BUILTIN_PCMPEQW128,
28144 IX86_BUILTIN_PCMPEQD128,
28145 IX86_BUILTIN_PCMPGTB128,
28146 IX86_BUILTIN_PCMPGTW128,
28147 IX86_BUILTIN_PCMPGTD128,
28149 IX86_BUILTIN_PMADDWD128,
28151 IX86_BUILTIN_PMAXSW128,
28152 IX86_BUILTIN_PMAXUB128,
28153 IX86_BUILTIN_PMINSW128,
28154 IX86_BUILTIN_PMINUB128,
28156 IX86_BUILTIN_PMULUDQ,
28157 IX86_BUILTIN_PMULUDQ128,
28158 IX86_BUILTIN_PMULHUW128,
28159 IX86_BUILTIN_PMULHW128,
28160 IX86_BUILTIN_PMULLW128,
28162 IX86_BUILTIN_PSADBW128,
28163 IX86_BUILTIN_PSHUFHW,
28164 IX86_BUILTIN_PSHUFLW,
28165 IX86_BUILTIN_PSHUFD,
28167 IX86_BUILTIN_PSLLDQI128,
28168 IX86_BUILTIN_PSLLWI128,
28169 IX86_BUILTIN_PSLLDI128,
28170 IX86_BUILTIN_PSLLQI128,
28171 IX86_BUILTIN_PSRAWI128,
28172 IX86_BUILTIN_PSRADI128,
28173 IX86_BUILTIN_PSRLDQI128,
28174 IX86_BUILTIN_PSRLWI128,
28175 IX86_BUILTIN_PSRLDI128,
28176 IX86_BUILTIN_PSRLQI128,
28178 IX86_BUILTIN_PSLLDQ128,
28179 IX86_BUILTIN_PSLLW128,
28180 IX86_BUILTIN_PSLLD128,
28181 IX86_BUILTIN_PSLLQ128,
28182 IX86_BUILTIN_PSRAW128,
28183 IX86_BUILTIN_PSRAD128,
28184 IX86_BUILTIN_PSRLW128,
28185 IX86_BUILTIN_PSRLD128,
28186 IX86_BUILTIN_PSRLQ128,
28188 IX86_BUILTIN_PUNPCKHBW128,
28189 IX86_BUILTIN_PUNPCKHWD128,
28190 IX86_BUILTIN_PUNPCKHDQ128,
28191 IX86_BUILTIN_PUNPCKHQDQ128,
28192 IX86_BUILTIN_PUNPCKLBW128,
28193 IX86_BUILTIN_PUNPCKLWD128,
28194 IX86_BUILTIN_PUNPCKLDQ128,
28195 IX86_BUILTIN_PUNPCKLQDQ128,
28197 IX86_BUILTIN_CLFLUSH,
28198 IX86_BUILTIN_MFENCE,
28199 IX86_BUILTIN_LFENCE,
28200 IX86_BUILTIN_PAUSE,
28202 IX86_BUILTIN_FNSTENV,
28203 IX86_BUILTIN_FLDENV,
28204 IX86_BUILTIN_FNSTSW,
28205 IX86_BUILTIN_FNCLEX,
28207 IX86_BUILTIN_BSRSI,
28208 IX86_BUILTIN_BSRDI,
28209 IX86_BUILTIN_RDPMC,
28210 IX86_BUILTIN_RDTSC,
28211 IX86_BUILTIN_RDTSCP,
28212 IX86_BUILTIN_ROLQI,
28213 IX86_BUILTIN_ROLHI,
28214 IX86_BUILTIN_RORQI,
28215 IX86_BUILTIN_RORHI,
28217 /* SSE3. */
28218 IX86_BUILTIN_ADDSUBPS,
28219 IX86_BUILTIN_HADDPS,
28220 IX86_BUILTIN_HSUBPS,
28221 IX86_BUILTIN_MOVSHDUP,
28222 IX86_BUILTIN_MOVSLDUP,
28223 IX86_BUILTIN_ADDSUBPD,
28224 IX86_BUILTIN_HADDPD,
28225 IX86_BUILTIN_HSUBPD,
28226 IX86_BUILTIN_LDDQU,
28228 IX86_BUILTIN_MONITOR,
28229 IX86_BUILTIN_MWAIT,
28231 /* SSSE3. */
28232 IX86_BUILTIN_PHADDW,
28233 IX86_BUILTIN_PHADDD,
28234 IX86_BUILTIN_PHADDSW,
28235 IX86_BUILTIN_PHSUBW,
28236 IX86_BUILTIN_PHSUBD,
28237 IX86_BUILTIN_PHSUBSW,
28238 IX86_BUILTIN_PMADDUBSW,
28239 IX86_BUILTIN_PMULHRSW,
28240 IX86_BUILTIN_PSHUFB,
28241 IX86_BUILTIN_PSIGNB,
28242 IX86_BUILTIN_PSIGNW,
28243 IX86_BUILTIN_PSIGND,
28244 IX86_BUILTIN_PALIGNR,
28245 IX86_BUILTIN_PABSB,
28246 IX86_BUILTIN_PABSW,
28247 IX86_BUILTIN_PABSD,
28249 IX86_BUILTIN_PHADDW128,
28250 IX86_BUILTIN_PHADDD128,
28251 IX86_BUILTIN_PHADDSW128,
28252 IX86_BUILTIN_PHSUBW128,
28253 IX86_BUILTIN_PHSUBD128,
28254 IX86_BUILTIN_PHSUBSW128,
28255 IX86_BUILTIN_PMADDUBSW128,
28256 IX86_BUILTIN_PMULHRSW128,
28257 IX86_BUILTIN_PSHUFB128,
28258 IX86_BUILTIN_PSIGNB128,
28259 IX86_BUILTIN_PSIGNW128,
28260 IX86_BUILTIN_PSIGND128,
28261 IX86_BUILTIN_PALIGNR128,
28262 IX86_BUILTIN_PABSB128,
28263 IX86_BUILTIN_PABSW128,
28264 IX86_BUILTIN_PABSD128,
28266 /* AMDFAM10 - SSE4A New Instructions. */
28267 IX86_BUILTIN_MOVNTSD,
28268 IX86_BUILTIN_MOVNTSS,
28269 IX86_BUILTIN_EXTRQI,
28270 IX86_BUILTIN_EXTRQ,
28271 IX86_BUILTIN_INSERTQI,
28272 IX86_BUILTIN_INSERTQ,
28274 /* SSE4.1. */
28275 IX86_BUILTIN_BLENDPD,
28276 IX86_BUILTIN_BLENDPS,
28277 IX86_BUILTIN_BLENDVPD,
28278 IX86_BUILTIN_BLENDVPS,
28279 IX86_BUILTIN_PBLENDVB128,
28280 IX86_BUILTIN_PBLENDW128,
28282 IX86_BUILTIN_DPPD,
28283 IX86_BUILTIN_DPPS,
28285 IX86_BUILTIN_INSERTPS128,
28287 IX86_BUILTIN_MOVNTDQA,
28288 IX86_BUILTIN_MPSADBW128,
28289 IX86_BUILTIN_PACKUSDW128,
28290 IX86_BUILTIN_PCMPEQQ,
28291 IX86_BUILTIN_PHMINPOSUW128,
28293 IX86_BUILTIN_PMAXSB128,
28294 IX86_BUILTIN_PMAXSD128,
28295 IX86_BUILTIN_PMAXUD128,
28296 IX86_BUILTIN_PMAXUW128,
28298 IX86_BUILTIN_PMINSB128,
28299 IX86_BUILTIN_PMINSD128,
28300 IX86_BUILTIN_PMINUD128,
28301 IX86_BUILTIN_PMINUW128,
28303 IX86_BUILTIN_PMOVSXBW128,
28304 IX86_BUILTIN_PMOVSXBD128,
28305 IX86_BUILTIN_PMOVSXBQ128,
28306 IX86_BUILTIN_PMOVSXWD128,
28307 IX86_BUILTIN_PMOVSXWQ128,
28308 IX86_BUILTIN_PMOVSXDQ128,
28310 IX86_BUILTIN_PMOVZXBW128,
28311 IX86_BUILTIN_PMOVZXBD128,
28312 IX86_BUILTIN_PMOVZXBQ128,
28313 IX86_BUILTIN_PMOVZXWD128,
28314 IX86_BUILTIN_PMOVZXWQ128,
28315 IX86_BUILTIN_PMOVZXDQ128,
28317 IX86_BUILTIN_PMULDQ128,
28318 IX86_BUILTIN_PMULLD128,
28320 IX86_BUILTIN_ROUNDSD,
28321 IX86_BUILTIN_ROUNDSS,
28323 IX86_BUILTIN_ROUNDPD,
28324 IX86_BUILTIN_ROUNDPS,
28326 IX86_BUILTIN_FLOORPD,
28327 IX86_BUILTIN_CEILPD,
28328 IX86_BUILTIN_TRUNCPD,
28329 IX86_BUILTIN_RINTPD,
28330 IX86_BUILTIN_ROUNDPD_AZ,
28332 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28333 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28334 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28336 IX86_BUILTIN_FLOORPS,
28337 IX86_BUILTIN_CEILPS,
28338 IX86_BUILTIN_TRUNCPS,
28339 IX86_BUILTIN_RINTPS,
28340 IX86_BUILTIN_ROUNDPS_AZ,
28342 IX86_BUILTIN_FLOORPS_SFIX,
28343 IX86_BUILTIN_CEILPS_SFIX,
28344 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28346 IX86_BUILTIN_PTESTZ,
28347 IX86_BUILTIN_PTESTC,
28348 IX86_BUILTIN_PTESTNZC,
28350 IX86_BUILTIN_VEC_INIT_V2SI,
28351 IX86_BUILTIN_VEC_INIT_V4HI,
28352 IX86_BUILTIN_VEC_INIT_V8QI,
28353 IX86_BUILTIN_VEC_EXT_V2DF,
28354 IX86_BUILTIN_VEC_EXT_V2DI,
28355 IX86_BUILTIN_VEC_EXT_V4SF,
28356 IX86_BUILTIN_VEC_EXT_V4SI,
28357 IX86_BUILTIN_VEC_EXT_V8HI,
28358 IX86_BUILTIN_VEC_EXT_V2SI,
28359 IX86_BUILTIN_VEC_EXT_V4HI,
28360 IX86_BUILTIN_VEC_EXT_V16QI,
28361 IX86_BUILTIN_VEC_SET_V2DI,
28362 IX86_BUILTIN_VEC_SET_V4SF,
28363 IX86_BUILTIN_VEC_SET_V4SI,
28364 IX86_BUILTIN_VEC_SET_V8HI,
28365 IX86_BUILTIN_VEC_SET_V4HI,
28366 IX86_BUILTIN_VEC_SET_V16QI,
28368 IX86_BUILTIN_VEC_PACK_SFIX,
28369 IX86_BUILTIN_VEC_PACK_SFIX256,
28371 /* SSE4.2. */
28372 IX86_BUILTIN_CRC32QI,
28373 IX86_BUILTIN_CRC32HI,
28374 IX86_BUILTIN_CRC32SI,
28375 IX86_BUILTIN_CRC32DI,
28377 IX86_BUILTIN_PCMPESTRI128,
28378 IX86_BUILTIN_PCMPESTRM128,
28379 IX86_BUILTIN_PCMPESTRA128,
28380 IX86_BUILTIN_PCMPESTRC128,
28381 IX86_BUILTIN_PCMPESTRO128,
28382 IX86_BUILTIN_PCMPESTRS128,
28383 IX86_BUILTIN_PCMPESTRZ128,
28384 IX86_BUILTIN_PCMPISTRI128,
28385 IX86_BUILTIN_PCMPISTRM128,
28386 IX86_BUILTIN_PCMPISTRA128,
28387 IX86_BUILTIN_PCMPISTRC128,
28388 IX86_BUILTIN_PCMPISTRO128,
28389 IX86_BUILTIN_PCMPISTRS128,
28390 IX86_BUILTIN_PCMPISTRZ128,
28392 IX86_BUILTIN_PCMPGTQ,
28394 /* AES instructions */
28395 IX86_BUILTIN_AESENC128,
28396 IX86_BUILTIN_AESENCLAST128,
28397 IX86_BUILTIN_AESDEC128,
28398 IX86_BUILTIN_AESDECLAST128,
28399 IX86_BUILTIN_AESIMC128,
28400 IX86_BUILTIN_AESKEYGENASSIST128,
28402 /* PCLMUL instruction */
28403 IX86_BUILTIN_PCLMULQDQ128,
28405 /* AVX */
28406 IX86_BUILTIN_ADDPD256,
28407 IX86_BUILTIN_ADDPS256,
28408 IX86_BUILTIN_ADDSUBPD256,
28409 IX86_BUILTIN_ADDSUBPS256,
28410 IX86_BUILTIN_ANDPD256,
28411 IX86_BUILTIN_ANDPS256,
28412 IX86_BUILTIN_ANDNPD256,
28413 IX86_BUILTIN_ANDNPS256,
28414 IX86_BUILTIN_BLENDPD256,
28415 IX86_BUILTIN_BLENDPS256,
28416 IX86_BUILTIN_BLENDVPD256,
28417 IX86_BUILTIN_BLENDVPS256,
28418 IX86_BUILTIN_DIVPD256,
28419 IX86_BUILTIN_DIVPS256,
28420 IX86_BUILTIN_DPPS256,
28421 IX86_BUILTIN_HADDPD256,
28422 IX86_BUILTIN_HADDPS256,
28423 IX86_BUILTIN_HSUBPD256,
28424 IX86_BUILTIN_HSUBPS256,
28425 IX86_BUILTIN_MAXPD256,
28426 IX86_BUILTIN_MAXPS256,
28427 IX86_BUILTIN_MINPD256,
28428 IX86_BUILTIN_MINPS256,
28429 IX86_BUILTIN_MULPD256,
28430 IX86_BUILTIN_MULPS256,
28431 IX86_BUILTIN_ORPD256,
28432 IX86_BUILTIN_ORPS256,
28433 IX86_BUILTIN_SHUFPD256,
28434 IX86_BUILTIN_SHUFPS256,
28435 IX86_BUILTIN_SUBPD256,
28436 IX86_BUILTIN_SUBPS256,
28437 IX86_BUILTIN_XORPD256,
28438 IX86_BUILTIN_XORPS256,
28439 IX86_BUILTIN_CMPSD,
28440 IX86_BUILTIN_CMPSS,
28441 IX86_BUILTIN_CMPPD,
28442 IX86_BUILTIN_CMPPS,
28443 IX86_BUILTIN_CMPPD256,
28444 IX86_BUILTIN_CMPPS256,
28445 IX86_BUILTIN_CVTDQ2PD256,
28446 IX86_BUILTIN_CVTDQ2PS256,
28447 IX86_BUILTIN_CVTPD2PS256,
28448 IX86_BUILTIN_CVTPS2DQ256,
28449 IX86_BUILTIN_CVTPS2PD256,
28450 IX86_BUILTIN_CVTTPD2DQ256,
28451 IX86_BUILTIN_CVTPD2DQ256,
28452 IX86_BUILTIN_CVTTPS2DQ256,
28453 IX86_BUILTIN_EXTRACTF128PD256,
28454 IX86_BUILTIN_EXTRACTF128PS256,
28455 IX86_BUILTIN_EXTRACTF128SI256,
28456 IX86_BUILTIN_VZEROALL,
28457 IX86_BUILTIN_VZEROUPPER,
28458 IX86_BUILTIN_VPERMILVARPD,
28459 IX86_BUILTIN_VPERMILVARPS,
28460 IX86_BUILTIN_VPERMILVARPD256,
28461 IX86_BUILTIN_VPERMILVARPS256,
28462 IX86_BUILTIN_VPERMILPD,
28463 IX86_BUILTIN_VPERMILPS,
28464 IX86_BUILTIN_VPERMILPD256,
28465 IX86_BUILTIN_VPERMILPS256,
28466 IX86_BUILTIN_VPERMIL2PD,
28467 IX86_BUILTIN_VPERMIL2PS,
28468 IX86_BUILTIN_VPERMIL2PD256,
28469 IX86_BUILTIN_VPERMIL2PS256,
28470 IX86_BUILTIN_VPERM2F128PD256,
28471 IX86_BUILTIN_VPERM2F128PS256,
28472 IX86_BUILTIN_VPERM2F128SI256,
28473 IX86_BUILTIN_VBROADCASTSS,
28474 IX86_BUILTIN_VBROADCASTSD256,
28475 IX86_BUILTIN_VBROADCASTSS256,
28476 IX86_BUILTIN_VBROADCASTPD256,
28477 IX86_BUILTIN_VBROADCASTPS256,
28478 IX86_BUILTIN_VINSERTF128PD256,
28479 IX86_BUILTIN_VINSERTF128PS256,
28480 IX86_BUILTIN_VINSERTF128SI256,
28481 IX86_BUILTIN_LOADUPD256,
28482 IX86_BUILTIN_LOADUPS256,
28483 IX86_BUILTIN_STOREUPD256,
28484 IX86_BUILTIN_STOREUPS256,
28485 IX86_BUILTIN_LDDQU256,
28486 IX86_BUILTIN_MOVNTDQ256,
28487 IX86_BUILTIN_MOVNTPD256,
28488 IX86_BUILTIN_MOVNTPS256,
28489 IX86_BUILTIN_LOADDQU256,
28490 IX86_BUILTIN_STOREDQU256,
28491 IX86_BUILTIN_MASKLOADPD,
28492 IX86_BUILTIN_MASKLOADPS,
28493 IX86_BUILTIN_MASKSTOREPD,
28494 IX86_BUILTIN_MASKSTOREPS,
28495 IX86_BUILTIN_MASKLOADPD256,
28496 IX86_BUILTIN_MASKLOADPS256,
28497 IX86_BUILTIN_MASKSTOREPD256,
28498 IX86_BUILTIN_MASKSTOREPS256,
28499 IX86_BUILTIN_MOVSHDUP256,
28500 IX86_BUILTIN_MOVSLDUP256,
28501 IX86_BUILTIN_MOVDDUP256,
28503 IX86_BUILTIN_SQRTPD256,
28504 IX86_BUILTIN_SQRTPS256,
28505 IX86_BUILTIN_SQRTPS_NR256,
28506 IX86_BUILTIN_RSQRTPS256,
28507 IX86_BUILTIN_RSQRTPS_NR256,
28509 IX86_BUILTIN_RCPPS256,
28511 IX86_BUILTIN_ROUNDPD256,
28512 IX86_BUILTIN_ROUNDPS256,
28514 IX86_BUILTIN_FLOORPD256,
28515 IX86_BUILTIN_CEILPD256,
28516 IX86_BUILTIN_TRUNCPD256,
28517 IX86_BUILTIN_RINTPD256,
28518 IX86_BUILTIN_ROUNDPD_AZ256,
28520 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28521 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28522 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28524 IX86_BUILTIN_FLOORPS256,
28525 IX86_BUILTIN_CEILPS256,
28526 IX86_BUILTIN_TRUNCPS256,
28527 IX86_BUILTIN_RINTPS256,
28528 IX86_BUILTIN_ROUNDPS_AZ256,
28530 IX86_BUILTIN_FLOORPS_SFIX256,
28531 IX86_BUILTIN_CEILPS_SFIX256,
28532 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28534 IX86_BUILTIN_UNPCKHPD256,
28535 IX86_BUILTIN_UNPCKLPD256,
28536 IX86_BUILTIN_UNPCKHPS256,
28537 IX86_BUILTIN_UNPCKLPS256,
28539 IX86_BUILTIN_SI256_SI,
28540 IX86_BUILTIN_PS256_PS,
28541 IX86_BUILTIN_PD256_PD,
28542 IX86_BUILTIN_SI_SI256,
28543 IX86_BUILTIN_PS_PS256,
28544 IX86_BUILTIN_PD_PD256,
28546 IX86_BUILTIN_VTESTZPD,
28547 IX86_BUILTIN_VTESTCPD,
28548 IX86_BUILTIN_VTESTNZCPD,
28549 IX86_BUILTIN_VTESTZPS,
28550 IX86_BUILTIN_VTESTCPS,
28551 IX86_BUILTIN_VTESTNZCPS,
28552 IX86_BUILTIN_VTESTZPD256,
28553 IX86_BUILTIN_VTESTCPD256,
28554 IX86_BUILTIN_VTESTNZCPD256,
28555 IX86_BUILTIN_VTESTZPS256,
28556 IX86_BUILTIN_VTESTCPS256,
28557 IX86_BUILTIN_VTESTNZCPS256,
28558 IX86_BUILTIN_PTESTZ256,
28559 IX86_BUILTIN_PTESTC256,
28560 IX86_BUILTIN_PTESTNZC256,
28562 IX86_BUILTIN_MOVMSKPD256,
28563 IX86_BUILTIN_MOVMSKPS256,
28565 /* AVX2 */
28566 IX86_BUILTIN_MPSADBW256,
28567 IX86_BUILTIN_PABSB256,
28568 IX86_BUILTIN_PABSW256,
28569 IX86_BUILTIN_PABSD256,
28570 IX86_BUILTIN_PACKSSDW256,
28571 IX86_BUILTIN_PACKSSWB256,
28572 IX86_BUILTIN_PACKUSDW256,
28573 IX86_BUILTIN_PACKUSWB256,
28574 IX86_BUILTIN_PADDB256,
28575 IX86_BUILTIN_PADDW256,
28576 IX86_BUILTIN_PADDD256,
28577 IX86_BUILTIN_PADDQ256,
28578 IX86_BUILTIN_PADDSB256,
28579 IX86_BUILTIN_PADDSW256,
28580 IX86_BUILTIN_PADDUSB256,
28581 IX86_BUILTIN_PADDUSW256,
28582 IX86_BUILTIN_PALIGNR256,
28583 IX86_BUILTIN_AND256I,
28584 IX86_BUILTIN_ANDNOT256I,
28585 IX86_BUILTIN_PAVGB256,
28586 IX86_BUILTIN_PAVGW256,
28587 IX86_BUILTIN_PBLENDVB256,
28588 IX86_BUILTIN_PBLENDVW256,
28589 IX86_BUILTIN_PCMPEQB256,
28590 IX86_BUILTIN_PCMPEQW256,
28591 IX86_BUILTIN_PCMPEQD256,
28592 IX86_BUILTIN_PCMPEQQ256,
28593 IX86_BUILTIN_PCMPGTB256,
28594 IX86_BUILTIN_PCMPGTW256,
28595 IX86_BUILTIN_PCMPGTD256,
28596 IX86_BUILTIN_PCMPGTQ256,
28597 IX86_BUILTIN_PHADDW256,
28598 IX86_BUILTIN_PHADDD256,
28599 IX86_BUILTIN_PHADDSW256,
28600 IX86_BUILTIN_PHSUBW256,
28601 IX86_BUILTIN_PHSUBD256,
28602 IX86_BUILTIN_PHSUBSW256,
28603 IX86_BUILTIN_PMADDUBSW256,
28604 IX86_BUILTIN_PMADDWD256,
28605 IX86_BUILTIN_PMAXSB256,
28606 IX86_BUILTIN_PMAXSW256,
28607 IX86_BUILTIN_PMAXSD256,
28608 IX86_BUILTIN_PMAXUB256,
28609 IX86_BUILTIN_PMAXUW256,
28610 IX86_BUILTIN_PMAXUD256,
28611 IX86_BUILTIN_PMINSB256,
28612 IX86_BUILTIN_PMINSW256,
28613 IX86_BUILTIN_PMINSD256,
28614 IX86_BUILTIN_PMINUB256,
28615 IX86_BUILTIN_PMINUW256,
28616 IX86_BUILTIN_PMINUD256,
28617 IX86_BUILTIN_PMOVMSKB256,
28618 IX86_BUILTIN_PMOVSXBW256,
28619 IX86_BUILTIN_PMOVSXBD256,
28620 IX86_BUILTIN_PMOVSXBQ256,
28621 IX86_BUILTIN_PMOVSXWD256,
28622 IX86_BUILTIN_PMOVSXWQ256,
28623 IX86_BUILTIN_PMOVSXDQ256,
28624 IX86_BUILTIN_PMOVZXBW256,
28625 IX86_BUILTIN_PMOVZXBD256,
28626 IX86_BUILTIN_PMOVZXBQ256,
28627 IX86_BUILTIN_PMOVZXWD256,
28628 IX86_BUILTIN_PMOVZXWQ256,
28629 IX86_BUILTIN_PMOVZXDQ256,
28630 IX86_BUILTIN_PMULDQ256,
28631 IX86_BUILTIN_PMULHRSW256,
28632 IX86_BUILTIN_PMULHUW256,
28633 IX86_BUILTIN_PMULHW256,
28634 IX86_BUILTIN_PMULLW256,
28635 IX86_BUILTIN_PMULLD256,
28636 IX86_BUILTIN_PMULUDQ256,
28637 IX86_BUILTIN_POR256,
28638 IX86_BUILTIN_PSADBW256,
28639 IX86_BUILTIN_PSHUFB256,
28640 IX86_BUILTIN_PSHUFD256,
28641 IX86_BUILTIN_PSHUFHW256,
28642 IX86_BUILTIN_PSHUFLW256,
28643 IX86_BUILTIN_PSIGNB256,
28644 IX86_BUILTIN_PSIGNW256,
28645 IX86_BUILTIN_PSIGND256,
28646 IX86_BUILTIN_PSLLDQI256,
28647 IX86_BUILTIN_PSLLWI256,
28648 IX86_BUILTIN_PSLLW256,
28649 IX86_BUILTIN_PSLLDI256,
28650 IX86_BUILTIN_PSLLD256,
28651 IX86_BUILTIN_PSLLQI256,
28652 IX86_BUILTIN_PSLLQ256,
28653 IX86_BUILTIN_PSRAWI256,
28654 IX86_BUILTIN_PSRAW256,
28655 IX86_BUILTIN_PSRADI256,
28656 IX86_BUILTIN_PSRAD256,
28657 IX86_BUILTIN_PSRLDQI256,
28658 IX86_BUILTIN_PSRLWI256,
28659 IX86_BUILTIN_PSRLW256,
28660 IX86_BUILTIN_PSRLDI256,
28661 IX86_BUILTIN_PSRLD256,
28662 IX86_BUILTIN_PSRLQI256,
28663 IX86_BUILTIN_PSRLQ256,
28664 IX86_BUILTIN_PSUBB256,
28665 IX86_BUILTIN_PSUBW256,
28666 IX86_BUILTIN_PSUBD256,
28667 IX86_BUILTIN_PSUBQ256,
28668 IX86_BUILTIN_PSUBSB256,
28669 IX86_BUILTIN_PSUBSW256,
28670 IX86_BUILTIN_PSUBUSB256,
28671 IX86_BUILTIN_PSUBUSW256,
28672 IX86_BUILTIN_PUNPCKHBW256,
28673 IX86_BUILTIN_PUNPCKHWD256,
28674 IX86_BUILTIN_PUNPCKHDQ256,
28675 IX86_BUILTIN_PUNPCKHQDQ256,
28676 IX86_BUILTIN_PUNPCKLBW256,
28677 IX86_BUILTIN_PUNPCKLWD256,
28678 IX86_BUILTIN_PUNPCKLDQ256,
28679 IX86_BUILTIN_PUNPCKLQDQ256,
28680 IX86_BUILTIN_PXOR256,
28681 IX86_BUILTIN_MOVNTDQA256,
28682 IX86_BUILTIN_VBROADCASTSS_PS,
28683 IX86_BUILTIN_VBROADCASTSS_PS256,
28684 IX86_BUILTIN_VBROADCASTSD_PD256,
28685 IX86_BUILTIN_VBROADCASTSI256,
28686 IX86_BUILTIN_PBLENDD256,
28687 IX86_BUILTIN_PBLENDD128,
28688 IX86_BUILTIN_PBROADCASTB256,
28689 IX86_BUILTIN_PBROADCASTW256,
28690 IX86_BUILTIN_PBROADCASTD256,
28691 IX86_BUILTIN_PBROADCASTQ256,
28692 IX86_BUILTIN_PBROADCASTB128,
28693 IX86_BUILTIN_PBROADCASTW128,
28694 IX86_BUILTIN_PBROADCASTD128,
28695 IX86_BUILTIN_PBROADCASTQ128,
28696 IX86_BUILTIN_VPERMVARSI256,
28697 IX86_BUILTIN_VPERMDF256,
28698 IX86_BUILTIN_VPERMVARSF256,
28699 IX86_BUILTIN_VPERMDI256,
28700 IX86_BUILTIN_VPERMTI256,
28701 IX86_BUILTIN_VEXTRACT128I256,
28702 IX86_BUILTIN_VINSERT128I256,
28703 IX86_BUILTIN_MASKLOADD,
28704 IX86_BUILTIN_MASKLOADQ,
28705 IX86_BUILTIN_MASKLOADD256,
28706 IX86_BUILTIN_MASKLOADQ256,
28707 IX86_BUILTIN_MASKSTORED,
28708 IX86_BUILTIN_MASKSTOREQ,
28709 IX86_BUILTIN_MASKSTORED256,
28710 IX86_BUILTIN_MASKSTOREQ256,
28711 IX86_BUILTIN_PSLLVV4DI,
28712 IX86_BUILTIN_PSLLVV2DI,
28713 IX86_BUILTIN_PSLLVV8SI,
28714 IX86_BUILTIN_PSLLVV4SI,
28715 IX86_BUILTIN_PSRAVV8SI,
28716 IX86_BUILTIN_PSRAVV4SI,
28717 IX86_BUILTIN_PSRLVV4DI,
28718 IX86_BUILTIN_PSRLVV2DI,
28719 IX86_BUILTIN_PSRLVV8SI,
28720 IX86_BUILTIN_PSRLVV4SI,
28722 IX86_BUILTIN_GATHERSIV2DF,
28723 IX86_BUILTIN_GATHERSIV4DF,
28724 IX86_BUILTIN_GATHERDIV2DF,
28725 IX86_BUILTIN_GATHERDIV4DF,
28726 IX86_BUILTIN_GATHERSIV4SF,
28727 IX86_BUILTIN_GATHERSIV8SF,
28728 IX86_BUILTIN_GATHERDIV4SF,
28729 IX86_BUILTIN_GATHERDIV8SF,
28730 IX86_BUILTIN_GATHERSIV2DI,
28731 IX86_BUILTIN_GATHERSIV4DI,
28732 IX86_BUILTIN_GATHERDIV2DI,
28733 IX86_BUILTIN_GATHERDIV4DI,
28734 IX86_BUILTIN_GATHERSIV4SI,
28735 IX86_BUILTIN_GATHERSIV8SI,
28736 IX86_BUILTIN_GATHERDIV4SI,
28737 IX86_BUILTIN_GATHERDIV8SI,
28739 /* AVX512F */
28740 IX86_BUILTIN_SI512_SI256,
28741 IX86_BUILTIN_PD512_PD256,
28742 IX86_BUILTIN_PS512_PS256,
28743 IX86_BUILTIN_SI512_SI,
28744 IX86_BUILTIN_PD512_PD,
28745 IX86_BUILTIN_PS512_PS,
28746 IX86_BUILTIN_ADDPD512,
28747 IX86_BUILTIN_ADDPS512,
28748 IX86_BUILTIN_ADDSD_ROUND,
28749 IX86_BUILTIN_ADDSS_ROUND,
28750 IX86_BUILTIN_ALIGND512,
28751 IX86_BUILTIN_ALIGNQ512,
28752 IX86_BUILTIN_BLENDMD512,
28753 IX86_BUILTIN_BLENDMPD512,
28754 IX86_BUILTIN_BLENDMPS512,
28755 IX86_BUILTIN_BLENDMQ512,
28756 IX86_BUILTIN_BROADCASTF32X4_512,
28757 IX86_BUILTIN_BROADCASTF64X4_512,
28758 IX86_BUILTIN_BROADCASTI32X4_512,
28759 IX86_BUILTIN_BROADCASTI64X4_512,
28760 IX86_BUILTIN_BROADCASTSD512,
28761 IX86_BUILTIN_BROADCASTSS512,
28762 IX86_BUILTIN_CMPD512,
28763 IX86_BUILTIN_CMPPD512,
28764 IX86_BUILTIN_CMPPS512,
28765 IX86_BUILTIN_CMPQ512,
28766 IX86_BUILTIN_CMPSD_MASK,
28767 IX86_BUILTIN_CMPSS_MASK,
28768 IX86_BUILTIN_COMIDF,
28769 IX86_BUILTIN_COMISF,
28770 IX86_BUILTIN_COMPRESSPD512,
28771 IX86_BUILTIN_COMPRESSPDSTORE512,
28772 IX86_BUILTIN_COMPRESSPS512,
28773 IX86_BUILTIN_COMPRESSPSSTORE512,
28774 IX86_BUILTIN_CVTDQ2PD512,
28775 IX86_BUILTIN_CVTDQ2PS512,
28776 IX86_BUILTIN_CVTPD2DQ512,
28777 IX86_BUILTIN_CVTPD2PS512,
28778 IX86_BUILTIN_CVTPD2UDQ512,
28779 IX86_BUILTIN_CVTPH2PS512,
28780 IX86_BUILTIN_CVTPS2DQ512,
28781 IX86_BUILTIN_CVTPS2PD512,
28782 IX86_BUILTIN_CVTPS2PH512,
28783 IX86_BUILTIN_CVTPS2UDQ512,
28784 IX86_BUILTIN_CVTSD2SS_ROUND,
28785 IX86_BUILTIN_CVTSI2SD64,
28786 IX86_BUILTIN_CVTSI2SS32,
28787 IX86_BUILTIN_CVTSI2SS64,
28788 IX86_BUILTIN_CVTSS2SD_ROUND,
28789 IX86_BUILTIN_CVTTPD2DQ512,
28790 IX86_BUILTIN_CVTTPD2UDQ512,
28791 IX86_BUILTIN_CVTTPS2DQ512,
28792 IX86_BUILTIN_CVTTPS2UDQ512,
28793 IX86_BUILTIN_CVTUDQ2PD512,
28794 IX86_BUILTIN_CVTUDQ2PS512,
28795 IX86_BUILTIN_CVTUSI2SD32,
28796 IX86_BUILTIN_CVTUSI2SD64,
28797 IX86_BUILTIN_CVTUSI2SS32,
28798 IX86_BUILTIN_CVTUSI2SS64,
28799 IX86_BUILTIN_DIVPD512,
28800 IX86_BUILTIN_DIVPS512,
28801 IX86_BUILTIN_DIVSD_ROUND,
28802 IX86_BUILTIN_DIVSS_ROUND,
28803 IX86_BUILTIN_EXPANDPD512,
28804 IX86_BUILTIN_EXPANDPD512Z,
28805 IX86_BUILTIN_EXPANDPDLOAD512,
28806 IX86_BUILTIN_EXPANDPDLOAD512Z,
28807 IX86_BUILTIN_EXPANDPS512,
28808 IX86_BUILTIN_EXPANDPS512Z,
28809 IX86_BUILTIN_EXPANDPSLOAD512,
28810 IX86_BUILTIN_EXPANDPSLOAD512Z,
28811 IX86_BUILTIN_EXTRACTF32X4,
28812 IX86_BUILTIN_EXTRACTF64X4,
28813 IX86_BUILTIN_EXTRACTI32X4,
28814 IX86_BUILTIN_EXTRACTI64X4,
28815 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28816 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28817 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28818 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28819 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28820 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28821 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28822 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28823 IX86_BUILTIN_GETEXPPD512,
28824 IX86_BUILTIN_GETEXPPS512,
28825 IX86_BUILTIN_GETEXPSD128,
28826 IX86_BUILTIN_GETEXPSS128,
28827 IX86_BUILTIN_GETMANTPD512,
28828 IX86_BUILTIN_GETMANTPS512,
28829 IX86_BUILTIN_GETMANTSD128,
28830 IX86_BUILTIN_GETMANTSS128,
28831 IX86_BUILTIN_INSERTF32X4,
28832 IX86_BUILTIN_INSERTF64X4,
28833 IX86_BUILTIN_INSERTI32X4,
28834 IX86_BUILTIN_INSERTI64X4,
28835 IX86_BUILTIN_LOADAPD512,
28836 IX86_BUILTIN_LOADAPS512,
28837 IX86_BUILTIN_LOADDQUDI512,
28838 IX86_BUILTIN_LOADDQUSI512,
28839 IX86_BUILTIN_LOADUPD512,
28840 IX86_BUILTIN_LOADUPS512,
28841 IX86_BUILTIN_MAXPD512,
28842 IX86_BUILTIN_MAXPS512,
28843 IX86_BUILTIN_MAXSD_ROUND,
28844 IX86_BUILTIN_MAXSS_ROUND,
28845 IX86_BUILTIN_MINPD512,
28846 IX86_BUILTIN_MINPS512,
28847 IX86_BUILTIN_MINSD_ROUND,
28848 IX86_BUILTIN_MINSS_ROUND,
28849 IX86_BUILTIN_MOVAPD512,
28850 IX86_BUILTIN_MOVAPS512,
28851 IX86_BUILTIN_MOVDDUP512,
28852 IX86_BUILTIN_MOVDQA32LOAD512,
28853 IX86_BUILTIN_MOVDQA32STORE512,
28854 IX86_BUILTIN_MOVDQA32_512,
28855 IX86_BUILTIN_MOVDQA64LOAD512,
28856 IX86_BUILTIN_MOVDQA64STORE512,
28857 IX86_BUILTIN_MOVDQA64_512,
28858 IX86_BUILTIN_MOVNTDQ512,
28859 IX86_BUILTIN_MOVNTDQA512,
28860 IX86_BUILTIN_MOVNTPD512,
28861 IX86_BUILTIN_MOVNTPS512,
28862 IX86_BUILTIN_MOVSHDUP512,
28863 IX86_BUILTIN_MOVSLDUP512,
28864 IX86_BUILTIN_MULPD512,
28865 IX86_BUILTIN_MULPS512,
28866 IX86_BUILTIN_MULSD_ROUND,
28867 IX86_BUILTIN_MULSS_ROUND,
28868 IX86_BUILTIN_PABSD512,
28869 IX86_BUILTIN_PABSQ512,
28870 IX86_BUILTIN_PADDD512,
28871 IX86_BUILTIN_PADDQ512,
28872 IX86_BUILTIN_PANDD512,
28873 IX86_BUILTIN_PANDND512,
28874 IX86_BUILTIN_PANDNQ512,
28875 IX86_BUILTIN_PANDQ512,
28876 IX86_BUILTIN_PBROADCASTD512,
28877 IX86_BUILTIN_PBROADCASTD512_GPR,
28878 IX86_BUILTIN_PBROADCASTMB512,
28879 IX86_BUILTIN_PBROADCASTMW512,
28880 IX86_BUILTIN_PBROADCASTQ512,
28881 IX86_BUILTIN_PBROADCASTQ512_GPR,
28882 IX86_BUILTIN_PCMPEQD512_MASK,
28883 IX86_BUILTIN_PCMPEQQ512_MASK,
28884 IX86_BUILTIN_PCMPGTD512_MASK,
28885 IX86_BUILTIN_PCMPGTQ512_MASK,
28886 IX86_BUILTIN_PCOMPRESSD512,
28887 IX86_BUILTIN_PCOMPRESSDSTORE512,
28888 IX86_BUILTIN_PCOMPRESSQ512,
28889 IX86_BUILTIN_PCOMPRESSQSTORE512,
28890 IX86_BUILTIN_PEXPANDD512,
28891 IX86_BUILTIN_PEXPANDD512Z,
28892 IX86_BUILTIN_PEXPANDDLOAD512,
28893 IX86_BUILTIN_PEXPANDDLOAD512Z,
28894 IX86_BUILTIN_PEXPANDQ512,
28895 IX86_BUILTIN_PEXPANDQ512Z,
28896 IX86_BUILTIN_PEXPANDQLOAD512,
28897 IX86_BUILTIN_PEXPANDQLOAD512Z,
28898 IX86_BUILTIN_PMAXSD512,
28899 IX86_BUILTIN_PMAXSQ512,
28900 IX86_BUILTIN_PMAXUD512,
28901 IX86_BUILTIN_PMAXUQ512,
28902 IX86_BUILTIN_PMINSD512,
28903 IX86_BUILTIN_PMINSQ512,
28904 IX86_BUILTIN_PMINUD512,
28905 IX86_BUILTIN_PMINUQ512,
28906 IX86_BUILTIN_PMOVDB512,
28907 IX86_BUILTIN_PMOVDB512_MEM,
28908 IX86_BUILTIN_PMOVDW512,
28909 IX86_BUILTIN_PMOVDW512_MEM,
28910 IX86_BUILTIN_PMOVQB512,
28911 IX86_BUILTIN_PMOVQB512_MEM,
28912 IX86_BUILTIN_PMOVQD512,
28913 IX86_BUILTIN_PMOVQD512_MEM,
28914 IX86_BUILTIN_PMOVQW512,
28915 IX86_BUILTIN_PMOVQW512_MEM,
28916 IX86_BUILTIN_PMOVSDB512,
28917 IX86_BUILTIN_PMOVSDB512_MEM,
28918 IX86_BUILTIN_PMOVSDW512,
28919 IX86_BUILTIN_PMOVSDW512_MEM,
28920 IX86_BUILTIN_PMOVSQB512,
28921 IX86_BUILTIN_PMOVSQB512_MEM,
28922 IX86_BUILTIN_PMOVSQD512,
28923 IX86_BUILTIN_PMOVSQD512_MEM,
28924 IX86_BUILTIN_PMOVSQW512,
28925 IX86_BUILTIN_PMOVSQW512_MEM,
28926 IX86_BUILTIN_PMOVSXBD512,
28927 IX86_BUILTIN_PMOVSXBQ512,
28928 IX86_BUILTIN_PMOVSXDQ512,
28929 IX86_BUILTIN_PMOVSXWD512,
28930 IX86_BUILTIN_PMOVSXWQ512,
28931 IX86_BUILTIN_PMOVUSDB512,
28932 IX86_BUILTIN_PMOVUSDB512_MEM,
28933 IX86_BUILTIN_PMOVUSDW512,
28934 IX86_BUILTIN_PMOVUSDW512_MEM,
28935 IX86_BUILTIN_PMOVUSQB512,
28936 IX86_BUILTIN_PMOVUSQB512_MEM,
28937 IX86_BUILTIN_PMOVUSQD512,
28938 IX86_BUILTIN_PMOVUSQD512_MEM,
28939 IX86_BUILTIN_PMOVUSQW512,
28940 IX86_BUILTIN_PMOVUSQW512_MEM,
28941 IX86_BUILTIN_PMOVZXBD512,
28942 IX86_BUILTIN_PMOVZXBQ512,
28943 IX86_BUILTIN_PMOVZXDQ512,
28944 IX86_BUILTIN_PMOVZXWD512,
28945 IX86_BUILTIN_PMOVZXWQ512,
28946 IX86_BUILTIN_PMULDQ512,
28947 IX86_BUILTIN_PMULLD512,
28948 IX86_BUILTIN_PMULUDQ512,
28949 IX86_BUILTIN_PORD512,
28950 IX86_BUILTIN_PORQ512,
28951 IX86_BUILTIN_PROLD512,
28952 IX86_BUILTIN_PROLQ512,
28953 IX86_BUILTIN_PROLVD512,
28954 IX86_BUILTIN_PROLVQ512,
28955 IX86_BUILTIN_PRORD512,
28956 IX86_BUILTIN_PRORQ512,
28957 IX86_BUILTIN_PRORVD512,
28958 IX86_BUILTIN_PRORVQ512,
28959 IX86_BUILTIN_PSHUFD512,
28960 IX86_BUILTIN_PSLLD512,
28961 IX86_BUILTIN_PSLLDI512,
28962 IX86_BUILTIN_PSLLQ512,
28963 IX86_BUILTIN_PSLLQI512,
28964 IX86_BUILTIN_PSLLVV16SI,
28965 IX86_BUILTIN_PSLLVV8DI,
28966 IX86_BUILTIN_PSRAD512,
28967 IX86_BUILTIN_PSRADI512,
28968 IX86_BUILTIN_PSRAQ512,
28969 IX86_BUILTIN_PSRAQI512,
28970 IX86_BUILTIN_PSRAVV16SI,
28971 IX86_BUILTIN_PSRAVV8DI,
28972 IX86_BUILTIN_PSRLD512,
28973 IX86_BUILTIN_PSRLDI512,
28974 IX86_BUILTIN_PSRLQ512,
28975 IX86_BUILTIN_PSRLQI512,
28976 IX86_BUILTIN_PSRLVV16SI,
28977 IX86_BUILTIN_PSRLVV8DI,
28978 IX86_BUILTIN_PSUBD512,
28979 IX86_BUILTIN_PSUBQ512,
28980 IX86_BUILTIN_PTESTMD512,
28981 IX86_BUILTIN_PTESTMQ512,
28982 IX86_BUILTIN_PTESTNMD512,
28983 IX86_BUILTIN_PTESTNMQ512,
28984 IX86_BUILTIN_PUNPCKHDQ512,
28985 IX86_BUILTIN_PUNPCKHQDQ512,
28986 IX86_BUILTIN_PUNPCKLDQ512,
28987 IX86_BUILTIN_PUNPCKLQDQ512,
28988 IX86_BUILTIN_PXORD512,
28989 IX86_BUILTIN_PXORQ512,
28990 IX86_BUILTIN_RCP14PD512,
28991 IX86_BUILTIN_RCP14PS512,
28992 IX86_BUILTIN_RCP14SD,
28993 IX86_BUILTIN_RCP14SS,
28994 IX86_BUILTIN_RNDSCALEPD,
28995 IX86_BUILTIN_RNDSCALEPS,
28996 IX86_BUILTIN_RNDSCALESD,
28997 IX86_BUILTIN_RNDSCALESS,
28998 IX86_BUILTIN_RSQRT14PD512,
28999 IX86_BUILTIN_RSQRT14PS512,
29000 IX86_BUILTIN_RSQRT14SD,
29001 IX86_BUILTIN_RSQRT14SS,
29002 IX86_BUILTIN_SCALEFPD512,
29003 IX86_BUILTIN_SCALEFPS512,
29004 IX86_BUILTIN_SCALEFSD,
29005 IX86_BUILTIN_SCALEFSS,
29006 IX86_BUILTIN_SHUFPD512,
29007 IX86_BUILTIN_SHUFPS512,
29008 IX86_BUILTIN_SHUF_F32x4,
29009 IX86_BUILTIN_SHUF_F64x2,
29010 IX86_BUILTIN_SHUF_I32x4,
29011 IX86_BUILTIN_SHUF_I64x2,
29012 IX86_BUILTIN_SQRTPD512,
29013 IX86_BUILTIN_SQRTPD512_MASK,
29014 IX86_BUILTIN_SQRTPS512_MASK,
29015 IX86_BUILTIN_SQRTPS_NR512,
29016 IX86_BUILTIN_SQRTSD_ROUND,
29017 IX86_BUILTIN_SQRTSS_ROUND,
29018 IX86_BUILTIN_STOREAPD512,
29019 IX86_BUILTIN_STOREAPS512,
29020 IX86_BUILTIN_STOREDQUDI512,
29021 IX86_BUILTIN_STOREDQUSI512,
29022 IX86_BUILTIN_STOREUPD512,
29023 IX86_BUILTIN_STOREUPS512,
29024 IX86_BUILTIN_SUBPD512,
29025 IX86_BUILTIN_SUBPS512,
29026 IX86_BUILTIN_SUBSD_ROUND,
29027 IX86_BUILTIN_SUBSS_ROUND,
29028 IX86_BUILTIN_UCMPD512,
29029 IX86_BUILTIN_UCMPQ512,
29030 IX86_BUILTIN_UNPCKHPD512,
29031 IX86_BUILTIN_UNPCKHPS512,
29032 IX86_BUILTIN_UNPCKLPD512,
29033 IX86_BUILTIN_UNPCKLPS512,
29034 IX86_BUILTIN_VCVTSD2SI32,
29035 IX86_BUILTIN_VCVTSD2SI64,
29036 IX86_BUILTIN_VCVTSD2USI32,
29037 IX86_BUILTIN_VCVTSD2USI64,
29038 IX86_BUILTIN_VCVTSS2SI32,
29039 IX86_BUILTIN_VCVTSS2SI64,
29040 IX86_BUILTIN_VCVTSS2USI32,
29041 IX86_BUILTIN_VCVTSS2USI64,
29042 IX86_BUILTIN_VCVTTSD2SI32,
29043 IX86_BUILTIN_VCVTTSD2SI64,
29044 IX86_BUILTIN_VCVTTSD2USI32,
29045 IX86_BUILTIN_VCVTTSD2USI64,
29046 IX86_BUILTIN_VCVTTSS2SI32,
29047 IX86_BUILTIN_VCVTTSS2SI64,
29048 IX86_BUILTIN_VCVTTSS2USI32,
29049 IX86_BUILTIN_VCVTTSS2USI64,
29050 IX86_BUILTIN_VFMADDPD512_MASK,
29051 IX86_BUILTIN_VFMADDPD512_MASK3,
29052 IX86_BUILTIN_VFMADDPD512_MASKZ,
29053 IX86_BUILTIN_VFMADDPS512_MASK,
29054 IX86_BUILTIN_VFMADDPS512_MASK3,
29055 IX86_BUILTIN_VFMADDPS512_MASKZ,
29056 IX86_BUILTIN_VFMADDSD3_ROUND,
29057 IX86_BUILTIN_VFMADDSS3_ROUND,
29058 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29059 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29060 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29061 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29062 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29063 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29064 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29065 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29066 IX86_BUILTIN_VFMSUBPD512_MASK3,
29067 IX86_BUILTIN_VFMSUBPS512_MASK3,
29068 IX86_BUILTIN_VFMSUBSD3_MASK3,
29069 IX86_BUILTIN_VFMSUBSS3_MASK3,
29070 IX86_BUILTIN_VFNMADDPD512_MASK,
29071 IX86_BUILTIN_VFNMADDPS512_MASK,
29072 IX86_BUILTIN_VFNMSUBPD512_MASK,
29073 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29074 IX86_BUILTIN_VFNMSUBPS512_MASK,
29075 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29076 IX86_BUILTIN_VPCLZCNTD512,
29077 IX86_BUILTIN_VPCLZCNTQ512,
29078 IX86_BUILTIN_VPCONFLICTD512,
29079 IX86_BUILTIN_VPCONFLICTQ512,
29080 IX86_BUILTIN_VPERMDF512,
29081 IX86_BUILTIN_VPERMDI512,
29082 IX86_BUILTIN_VPERMI2VARD512,
29083 IX86_BUILTIN_VPERMI2VARPD512,
29084 IX86_BUILTIN_VPERMI2VARPS512,
29085 IX86_BUILTIN_VPERMI2VARQ512,
29086 IX86_BUILTIN_VPERMILPD512,
29087 IX86_BUILTIN_VPERMILPS512,
29088 IX86_BUILTIN_VPERMILVARPD512,
29089 IX86_BUILTIN_VPERMILVARPS512,
29090 IX86_BUILTIN_VPERMT2VARD512,
29091 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29092 IX86_BUILTIN_VPERMT2VARPD512,
29093 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29094 IX86_BUILTIN_VPERMT2VARPS512,
29095 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29096 IX86_BUILTIN_VPERMT2VARQ512,
29097 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29098 IX86_BUILTIN_VPERMVARDF512,
29099 IX86_BUILTIN_VPERMVARDI512,
29100 IX86_BUILTIN_VPERMVARSF512,
29101 IX86_BUILTIN_VPERMVARSI512,
29102 IX86_BUILTIN_VTERNLOGD512_MASK,
29103 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29104 IX86_BUILTIN_VTERNLOGQ512_MASK,
29105 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29107 /* Mask arithmetic operations */
29108 IX86_BUILTIN_KAND16,
29109 IX86_BUILTIN_KANDN16,
29110 IX86_BUILTIN_KNOT16,
29111 IX86_BUILTIN_KOR16,
29112 IX86_BUILTIN_KORTESTC16,
29113 IX86_BUILTIN_KORTESTZ16,
29114 IX86_BUILTIN_KUNPCKBW,
29115 IX86_BUILTIN_KXNOR16,
29116 IX86_BUILTIN_KXOR16,
29117 IX86_BUILTIN_KMOV16,
29119 /* AVX512VL. */
29120 IX86_BUILTIN_PMOVUSQD256_MEM,
29121 IX86_BUILTIN_PMOVUSQD128_MEM,
29122 IX86_BUILTIN_PMOVSQD256_MEM,
29123 IX86_BUILTIN_PMOVSQD128_MEM,
29124 IX86_BUILTIN_PMOVQD256_MEM,
29125 IX86_BUILTIN_PMOVQD128_MEM,
29126 IX86_BUILTIN_PMOVUSQW256_MEM,
29127 IX86_BUILTIN_PMOVUSQW128_MEM,
29128 IX86_BUILTIN_PMOVSQW256_MEM,
29129 IX86_BUILTIN_PMOVSQW128_MEM,
29130 IX86_BUILTIN_PMOVQW256_MEM,
29131 IX86_BUILTIN_PMOVQW128_MEM,
29132 IX86_BUILTIN_PMOVUSQB256_MEM,
29133 IX86_BUILTIN_PMOVUSQB128_MEM,
29134 IX86_BUILTIN_PMOVSQB256_MEM,
29135 IX86_BUILTIN_PMOVSQB128_MEM,
29136 IX86_BUILTIN_PMOVQB256_MEM,
29137 IX86_BUILTIN_PMOVQB128_MEM,
29138 IX86_BUILTIN_PMOVUSDW256_MEM,
29139 IX86_BUILTIN_PMOVUSDW128_MEM,
29140 IX86_BUILTIN_PMOVSDW256_MEM,
29141 IX86_BUILTIN_PMOVSDW128_MEM,
29142 IX86_BUILTIN_PMOVDW256_MEM,
29143 IX86_BUILTIN_PMOVDW128_MEM,
29144 IX86_BUILTIN_PMOVUSDB256_MEM,
29145 IX86_BUILTIN_PMOVUSDB128_MEM,
29146 IX86_BUILTIN_PMOVSDB256_MEM,
29147 IX86_BUILTIN_PMOVSDB128_MEM,
29148 IX86_BUILTIN_PMOVDB256_MEM,
29149 IX86_BUILTIN_PMOVDB128_MEM,
29150 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29151 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29152 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29153 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29154 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29155 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29156 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29157 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29158 IX86_BUILTIN_LOADAPD256_MASK,
29159 IX86_BUILTIN_LOADAPD128_MASK,
29160 IX86_BUILTIN_LOADAPS256_MASK,
29161 IX86_BUILTIN_LOADAPS128_MASK,
29162 IX86_BUILTIN_STOREAPD256_MASK,
29163 IX86_BUILTIN_STOREAPD128_MASK,
29164 IX86_BUILTIN_STOREAPS256_MASK,
29165 IX86_BUILTIN_STOREAPS128_MASK,
29166 IX86_BUILTIN_LOADUPD256_MASK,
29167 IX86_BUILTIN_LOADUPD128_MASK,
29168 IX86_BUILTIN_LOADUPS256_MASK,
29169 IX86_BUILTIN_LOADUPS128_MASK,
29170 IX86_BUILTIN_STOREUPD256_MASK,
29171 IX86_BUILTIN_STOREUPD128_MASK,
29172 IX86_BUILTIN_STOREUPS256_MASK,
29173 IX86_BUILTIN_STOREUPS128_MASK,
29174 IX86_BUILTIN_LOADDQUDI256_MASK,
29175 IX86_BUILTIN_LOADDQUDI128_MASK,
29176 IX86_BUILTIN_LOADDQUSI256_MASK,
29177 IX86_BUILTIN_LOADDQUSI128_MASK,
29178 IX86_BUILTIN_LOADDQUHI256_MASK,
29179 IX86_BUILTIN_LOADDQUHI128_MASK,
29180 IX86_BUILTIN_LOADDQUQI256_MASK,
29181 IX86_BUILTIN_LOADDQUQI128_MASK,
29182 IX86_BUILTIN_STOREDQUDI256_MASK,
29183 IX86_BUILTIN_STOREDQUDI128_MASK,
29184 IX86_BUILTIN_STOREDQUSI256_MASK,
29185 IX86_BUILTIN_STOREDQUSI128_MASK,
29186 IX86_BUILTIN_STOREDQUHI256_MASK,
29187 IX86_BUILTIN_STOREDQUHI128_MASK,
29188 IX86_BUILTIN_STOREDQUQI256_MASK,
29189 IX86_BUILTIN_STOREDQUQI128_MASK,
29190 IX86_BUILTIN_COMPRESSPDSTORE256,
29191 IX86_BUILTIN_COMPRESSPDSTORE128,
29192 IX86_BUILTIN_COMPRESSPSSTORE256,
29193 IX86_BUILTIN_COMPRESSPSSTORE128,
29194 IX86_BUILTIN_PCOMPRESSQSTORE256,
29195 IX86_BUILTIN_PCOMPRESSQSTORE128,
29196 IX86_BUILTIN_PCOMPRESSDSTORE256,
29197 IX86_BUILTIN_PCOMPRESSDSTORE128,
29198 IX86_BUILTIN_EXPANDPDLOAD256,
29199 IX86_BUILTIN_EXPANDPDLOAD128,
29200 IX86_BUILTIN_EXPANDPSLOAD256,
29201 IX86_BUILTIN_EXPANDPSLOAD128,
29202 IX86_BUILTIN_PEXPANDQLOAD256,
29203 IX86_BUILTIN_PEXPANDQLOAD128,
29204 IX86_BUILTIN_PEXPANDDLOAD256,
29205 IX86_BUILTIN_PEXPANDDLOAD128,
29206 IX86_BUILTIN_EXPANDPDLOAD256Z,
29207 IX86_BUILTIN_EXPANDPDLOAD128Z,
29208 IX86_BUILTIN_EXPANDPSLOAD256Z,
29209 IX86_BUILTIN_EXPANDPSLOAD128Z,
29210 IX86_BUILTIN_PEXPANDQLOAD256Z,
29211 IX86_BUILTIN_PEXPANDQLOAD128Z,
29212 IX86_BUILTIN_PEXPANDDLOAD256Z,
29213 IX86_BUILTIN_PEXPANDDLOAD128Z,
29214 IX86_BUILTIN_PALIGNR256_MASK,
29215 IX86_BUILTIN_PALIGNR128_MASK,
29216 IX86_BUILTIN_MOVDQA64_256_MASK,
29217 IX86_BUILTIN_MOVDQA64_128_MASK,
29218 IX86_BUILTIN_MOVDQA32_256_MASK,
29219 IX86_BUILTIN_MOVDQA32_128_MASK,
29220 IX86_BUILTIN_MOVAPD256_MASK,
29221 IX86_BUILTIN_MOVAPD128_MASK,
29222 IX86_BUILTIN_MOVAPS256_MASK,
29223 IX86_BUILTIN_MOVAPS128_MASK,
29224 IX86_BUILTIN_MOVDQUHI256_MASK,
29225 IX86_BUILTIN_MOVDQUHI128_MASK,
29226 IX86_BUILTIN_MOVDQUQI256_MASK,
29227 IX86_BUILTIN_MOVDQUQI128_MASK,
29228 IX86_BUILTIN_MINPS128_MASK,
29229 IX86_BUILTIN_MAXPS128_MASK,
29230 IX86_BUILTIN_MINPD128_MASK,
29231 IX86_BUILTIN_MAXPD128_MASK,
29232 IX86_BUILTIN_MAXPD256_MASK,
29233 IX86_BUILTIN_MAXPS256_MASK,
29234 IX86_BUILTIN_MINPD256_MASK,
29235 IX86_BUILTIN_MINPS256_MASK,
29236 IX86_BUILTIN_MULPS128_MASK,
29237 IX86_BUILTIN_DIVPS128_MASK,
29238 IX86_BUILTIN_MULPD128_MASK,
29239 IX86_BUILTIN_DIVPD128_MASK,
29240 IX86_BUILTIN_DIVPD256_MASK,
29241 IX86_BUILTIN_DIVPS256_MASK,
29242 IX86_BUILTIN_MULPD256_MASK,
29243 IX86_BUILTIN_MULPS256_MASK,
29244 IX86_BUILTIN_ADDPD128_MASK,
29245 IX86_BUILTIN_ADDPD256_MASK,
29246 IX86_BUILTIN_ADDPS128_MASK,
29247 IX86_BUILTIN_ADDPS256_MASK,
29248 IX86_BUILTIN_SUBPD128_MASK,
29249 IX86_BUILTIN_SUBPD256_MASK,
29250 IX86_BUILTIN_SUBPS128_MASK,
29251 IX86_BUILTIN_SUBPS256_MASK,
29252 IX86_BUILTIN_XORPD256_MASK,
29253 IX86_BUILTIN_XORPD128_MASK,
29254 IX86_BUILTIN_XORPS256_MASK,
29255 IX86_BUILTIN_XORPS128_MASK,
29256 IX86_BUILTIN_ORPD256_MASK,
29257 IX86_BUILTIN_ORPD128_MASK,
29258 IX86_BUILTIN_ORPS256_MASK,
29259 IX86_BUILTIN_ORPS128_MASK,
29260 IX86_BUILTIN_BROADCASTF32x2_256,
29261 IX86_BUILTIN_BROADCASTI32x2_256,
29262 IX86_BUILTIN_BROADCASTI32x2_128,
29263 IX86_BUILTIN_BROADCASTF64X2_256,
29264 IX86_BUILTIN_BROADCASTI64X2_256,
29265 IX86_BUILTIN_BROADCASTF32X4_256,
29266 IX86_BUILTIN_BROADCASTI32X4_256,
29267 IX86_BUILTIN_EXTRACTF32X4_256,
29268 IX86_BUILTIN_EXTRACTI32X4_256,
29269 IX86_BUILTIN_DBPSADBW256,
29270 IX86_BUILTIN_DBPSADBW128,
29271 IX86_BUILTIN_CVTTPD2QQ256,
29272 IX86_BUILTIN_CVTTPD2QQ128,
29273 IX86_BUILTIN_CVTTPD2UQQ256,
29274 IX86_BUILTIN_CVTTPD2UQQ128,
29275 IX86_BUILTIN_CVTPD2QQ256,
29276 IX86_BUILTIN_CVTPD2QQ128,
29277 IX86_BUILTIN_CVTPD2UQQ256,
29278 IX86_BUILTIN_CVTPD2UQQ128,
29279 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29280 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29281 IX86_BUILTIN_CVTTPS2QQ256,
29282 IX86_BUILTIN_CVTTPS2QQ128,
29283 IX86_BUILTIN_CVTTPS2UQQ256,
29284 IX86_BUILTIN_CVTTPS2UQQ128,
29285 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29286 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29287 IX86_BUILTIN_CVTTPS2UDQ256,
29288 IX86_BUILTIN_CVTTPS2UDQ128,
29289 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29290 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29291 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29292 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29293 IX86_BUILTIN_CVTPD2DQ256_MASK,
29294 IX86_BUILTIN_CVTPD2DQ128_MASK,
29295 IX86_BUILTIN_CVTDQ2PD256_MASK,
29296 IX86_BUILTIN_CVTDQ2PD128_MASK,
29297 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29298 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29299 IX86_BUILTIN_CVTDQ2PS256_MASK,
29300 IX86_BUILTIN_CVTDQ2PS128_MASK,
29301 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29302 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29303 IX86_BUILTIN_CVTPS2PD256_MASK,
29304 IX86_BUILTIN_CVTPS2PD128_MASK,
29305 IX86_BUILTIN_PBROADCASTB256_MASK,
29306 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29307 IX86_BUILTIN_PBROADCASTB128_MASK,
29308 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29309 IX86_BUILTIN_PBROADCASTW256_MASK,
29310 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29311 IX86_BUILTIN_PBROADCASTW128_MASK,
29312 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29313 IX86_BUILTIN_PBROADCASTD256_MASK,
29314 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29315 IX86_BUILTIN_PBROADCASTD128_MASK,
29316 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29317 IX86_BUILTIN_PBROADCASTQ256_MASK,
29318 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29319 IX86_BUILTIN_PBROADCASTQ128_MASK,
29320 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29321 IX86_BUILTIN_BROADCASTSS256,
29322 IX86_BUILTIN_BROADCASTSS128,
29323 IX86_BUILTIN_BROADCASTSD256,
29324 IX86_BUILTIN_EXTRACTF64X2_256,
29325 IX86_BUILTIN_EXTRACTI64X2_256,
29326 IX86_BUILTIN_INSERTF32X4_256,
29327 IX86_BUILTIN_INSERTI32X4_256,
29328 IX86_BUILTIN_PMOVSXBW256_MASK,
29329 IX86_BUILTIN_PMOVSXBW128_MASK,
29330 IX86_BUILTIN_PMOVSXBD256_MASK,
29331 IX86_BUILTIN_PMOVSXBD128_MASK,
29332 IX86_BUILTIN_PMOVSXBQ256_MASK,
29333 IX86_BUILTIN_PMOVSXBQ128_MASK,
29334 IX86_BUILTIN_PMOVSXWD256_MASK,
29335 IX86_BUILTIN_PMOVSXWD128_MASK,
29336 IX86_BUILTIN_PMOVSXWQ256_MASK,
29337 IX86_BUILTIN_PMOVSXWQ128_MASK,
29338 IX86_BUILTIN_PMOVSXDQ256_MASK,
29339 IX86_BUILTIN_PMOVSXDQ128_MASK,
29340 IX86_BUILTIN_PMOVZXBW256_MASK,
29341 IX86_BUILTIN_PMOVZXBW128_MASK,
29342 IX86_BUILTIN_PMOVZXBD256_MASK,
29343 IX86_BUILTIN_PMOVZXBD128_MASK,
29344 IX86_BUILTIN_PMOVZXBQ256_MASK,
29345 IX86_BUILTIN_PMOVZXBQ128_MASK,
29346 IX86_BUILTIN_PMOVZXWD256_MASK,
29347 IX86_BUILTIN_PMOVZXWD128_MASK,
29348 IX86_BUILTIN_PMOVZXWQ256_MASK,
29349 IX86_BUILTIN_PMOVZXWQ128_MASK,
29350 IX86_BUILTIN_PMOVZXDQ256_MASK,
29351 IX86_BUILTIN_PMOVZXDQ128_MASK,
29352 IX86_BUILTIN_REDUCEPD256_MASK,
29353 IX86_BUILTIN_REDUCEPD128_MASK,
29354 IX86_BUILTIN_REDUCEPS256_MASK,
29355 IX86_BUILTIN_REDUCEPS128_MASK,
29356 IX86_BUILTIN_REDUCESD_MASK,
29357 IX86_BUILTIN_REDUCESS_MASK,
29358 IX86_BUILTIN_VPERMVARHI256_MASK,
29359 IX86_BUILTIN_VPERMVARHI128_MASK,
29360 IX86_BUILTIN_VPERMT2VARHI256,
29361 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29362 IX86_BUILTIN_VPERMT2VARHI128,
29363 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29364 IX86_BUILTIN_VPERMI2VARHI256,
29365 IX86_BUILTIN_VPERMI2VARHI128,
29366 IX86_BUILTIN_RCP14PD256,
29367 IX86_BUILTIN_RCP14PD128,
29368 IX86_BUILTIN_RCP14PS256,
29369 IX86_BUILTIN_RCP14PS128,
29370 IX86_BUILTIN_RSQRT14PD256_MASK,
29371 IX86_BUILTIN_RSQRT14PD128_MASK,
29372 IX86_BUILTIN_RSQRT14PS256_MASK,
29373 IX86_BUILTIN_RSQRT14PS128_MASK,
29374 IX86_BUILTIN_SQRTPD256_MASK,
29375 IX86_BUILTIN_SQRTPD128_MASK,
29376 IX86_BUILTIN_SQRTPS256_MASK,
29377 IX86_BUILTIN_SQRTPS128_MASK,
29378 IX86_BUILTIN_PADDB128_MASK,
29379 IX86_BUILTIN_PADDW128_MASK,
29380 IX86_BUILTIN_PADDD128_MASK,
29381 IX86_BUILTIN_PADDQ128_MASK,
29382 IX86_BUILTIN_PSUBB128_MASK,
29383 IX86_BUILTIN_PSUBW128_MASK,
29384 IX86_BUILTIN_PSUBD128_MASK,
29385 IX86_BUILTIN_PSUBQ128_MASK,
29386 IX86_BUILTIN_PADDSB128_MASK,
29387 IX86_BUILTIN_PADDSW128_MASK,
29388 IX86_BUILTIN_PSUBSB128_MASK,
29389 IX86_BUILTIN_PSUBSW128_MASK,
29390 IX86_BUILTIN_PADDUSB128_MASK,
29391 IX86_BUILTIN_PADDUSW128_MASK,
29392 IX86_BUILTIN_PSUBUSB128_MASK,
29393 IX86_BUILTIN_PSUBUSW128_MASK,
29394 IX86_BUILTIN_PADDB256_MASK,
29395 IX86_BUILTIN_PADDW256_MASK,
29396 IX86_BUILTIN_PADDD256_MASK,
29397 IX86_BUILTIN_PADDQ256_MASK,
29398 IX86_BUILTIN_PADDSB256_MASK,
29399 IX86_BUILTIN_PADDSW256_MASK,
29400 IX86_BUILTIN_PADDUSB256_MASK,
29401 IX86_BUILTIN_PADDUSW256_MASK,
29402 IX86_BUILTIN_PSUBB256_MASK,
29403 IX86_BUILTIN_PSUBW256_MASK,
29404 IX86_BUILTIN_PSUBD256_MASK,
29405 IX86_BUILTIN_PSUBQ256_MASK,
29406 IX86_BUILTIN_PSUBSB256_MASK,
29407 IX86_BUILTIN_PSUBSW256_MASK,
29408 IX86_BUILTIN_PSUBUSB256_MASK,
29409 IX86_BUILTIN_PSUBUSW256_MASK,
29410 IX86_BUILTIN_SHUF_F64x2_256,
29411 IX86_BUILTIN_SHUF_I64x2_256,
29412 IX86_BUILTIN_SHUF_I32x4_256,
29413 IX86_BUILTIN_SHUF_F32x4_256,
29414 IX86_BUILTIN_PMOVWB128,
29415 IX86_BUILTIN_PMOVWB256,
29416 IX86_BUILTIN_PMOVSWB128,
29417 IX86_BUILTIN_PMOVSWB256,
29418 IX86_BUILTIN_PMOVUSWB128,
29419 IX86_BUILTIN_PMOVUSWB256,
29420 IX86_BUILTIN_PMOVDB128,
29421 IX86_BUILTIN_PMOVDB256,
29422 IX86_BUILTIN_PMOVSDB128,
29423 IX86_BUILTIN_PMOVSDB256,
29424 IX86_BUILTIN_PMOVUSDB128,
29425 IX86_BUILTIN_PMOVUSDB256,
29426 IX86_BUILTIN_PMOVDW128,
29427 IX86_BUILTIN_PMOVDW256,
29428 IX86_BUILTIN_PMOVSDW128,
29429 IX86_BUILTIN_PMOVSDW256,
29430 IX86_BUILTIN_PMOVUSDW128,
29431 IX86_BUILTIN_PMOVUSDW256,
29432 IX86_BUILTIN_PMOVQB128,
29433 IX86_BUILTIN_PMOVQB256,
29434 IX86_BUILTIN_PMOVSQB128,
29435 IX86_BUILTIN_PMOVSQB256,
29436 IX86_BUILTIN_PMOVUSQB128,
29437 IX86_BUILTIN_PMOVUSQB256,
29438 IX86_BUILTIN_PMOVQW128,
29439 IX86_BUILTIN_PMOVQW256,
29440 IX86_BUILTIN_PMOVSQW128,
29441 IX86_BUILTIN_PMOVSQW256,
29442 IX86_BUILTIN_PMOVUSQW128,
29443 IX86_BUILTIN_PMOVUSQW256,
29444 IX86_BUILTIN_PMOVQD128,
29445 IX86_BUILTIN_PMOVQD256,
29446 IX86_BUILTIN_PMOVSQD128,
29447 IX86_BUILTIN_PMOVSQD256,
29448 IX86_BUILTIN_PMOVUSQD128,
29449 IX86_BUILTIN_PMOVUSQD256,
29450 IX86_BUILTIN_RANGEPD256,
29451 IX86_BUILTIN_RANGEPD128,
29452 IX86_BUILTIN_RANGEPS256,
29453 IX86_BUILTIN_RANGEPS128,
29454 IX86_BUILTIN_GETEXPPS256,
29455 IX86_BUILTIN_GETEXPPD256,
29456 IX86_BUILTIN_GETEXPPS128,
29457 IX86_BUILTIN_GETEXPPD128,
29458 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29459 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29460 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29461 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29462 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29463 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29464 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29465 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29466 IX86_BUILTIN_PABSQ256,
29467 IX86_BUILTIN_PABSQ128,
29468 IX86_BUILTIN_PABSD256_MASK,
29469 IX86_BUILTIN_PABSD128_MASK,
29470 IX86_BUILTIN_PMULHRSW256_MASK,
29471 IX86_BUILTIN_PMULHRSW128_MASK,
29472 IX86_BUILTIN_PMULHUW128_MASK,
29473 IX86_BUILTIN_PMULHUW256_MASK,
29474 IX86_BUILTIN_PMULHW256_MASK,
29475 IX86_BUILTIN_PMULHW128_MASK,
29476 IX86_BUILTIN_PMULLW256_MASK,
29477 IX86_BUILTIN_PMULLW128_MASK,
29478 IX86_BUILTIN_PMULLQ256,
29479 IX86_BUILTIN_PMULLQ128,
29480 IX86_BUILTIN_ANDPD256_MASK,
29481 IX86_BUILTIN_ANDPD128_MASK,
29482 IX86_BUILTIN_ANDPS256_MASK,
29483 IX86_BUILTIN_ANDPS128_MASK,
29484 IX86_BUILTIN_ANDNPD256_MASK,
29485 IX86_BUILTIN_ANDNPD128_MASK,
29486 IX86_BUILTIN_ANDNPS256_MASK,
29487 IX86_BUILTIN_ANDNPS128_MASK,
29488 IX86_BUILTIN_PSLLWI128_MASK,
29489 IX86_BUILTIN_PSLLDI128_MASK,
29490 IX86_BUILTIN_PSLLQI128_MASK,
29491 IX86_BUILTIN_PSLLW128_MASK,
29492 IX86_BUILTIN_PSLLD128_MASK,
29493 IX86_BUILTIN_PSLLQ128_MASK,
29494 IX86_BUILTIN_PSLLWI256_MASK ,
29495 IX86_BUILTIN_PSLLW256_MASK,
29496 IX86_BUILTIN_PSLLDI256_MASK,
29497 IX86_BUILTIN_PSLLD256_MASK,
29498 IX86_BUILTIN_PSLLQI256_MASK,
29499 IX86_BUILTIN_PSLLQ256_MASK,
29500 IX86_BUILTIN_PSRADI128_MASK,
29501 IX86_BUILTIN_PSRAD128_MASK,
29502 IX86_BUILTIN_PSRADI256_MASK,
29503 IX86_BUILTIN_PSRAD256_MASK,
29504 IX86_BUILTIN_PSRAQI128_MASK,
29505 IX86_BUILTIN_PSRAQ128_MASK,
29506 IX86_BUILTIN_PSRAQI256_MASK,
29507 IX86_BUILTIN_PSRAQ256_MASK,
29508 IX86_BUILTIN_PANDD256,
29509 IX86_BUILTIN_PANDD128,
29510 IX86_BUILTIN_PSRLDI128_MASK,
29511 IX86_BUILTIN_PSRLD128_MASK,
29512 IX86_BUILTIN_PSRLDI256_MASK,
29513 IX86_BUILTIN_PSRLD256_MASK,
29514 IX86_BUILTIN_PSRLQI128_MASK,
29515 IX86_BUILTIN_PSRLQ128_MASK,
29516 IX86_BUILTIN_PSRLQI256_MASK,
29517 IX86_BUILTIN_PSRLQ256_MASK,
29518 IX86_BUILTIN_PANDQ256,
29519 IX86_BUILTIN_PANDQ128,
29520 IX86_BUILTIN_PANDND256,
29521 IX86_BUILTIN_PANDND128,
29522 IX86_BUILTIN_PANDNQ256,
29523 IX86_BUILTIN_PANDNQ128,
29524 IX86_BUILTIN_PORD256,
29525 IX86_BUILTIN_PORD128,
29526 IX86_BUILTIN_PORQ256,
29527 IX86_BUILTIN_PORQ128,
29528 IX86_BUILTIN_PXORD256,
29529 IX86_BUILTIN_PXORD128,
29530 IX86_BUILTIN_PXORQ256,
29531 IX86_BUILTIN_PXORQ128,
29532 IX86_BUILTIN_PACKSSWB256_MASK,
29533 IX86_BUILTIN_PACKSSWB128_MASK,
29534 IX86_BUILTIN_PACKUSWB256_MASK,
29535 IX86_BUILTIN_PACKUSWB128_MASK,
29536 IX86_BUILTIN_RNDSCALEPS256,
29537 IX86_BUILTIN_RNDSCALEPD256,
29538 IX86_BUILTIN_RNDSCALEPS128,
29539 IX86_BUILTIN_RNDSCALEPD128,
29540 IX86_BUILTIN_VTERNLOGQ256_MASK,
29541 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29542 IX86_BUILTIN_VTERNLOGD256_MASK,
29543 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29544 IX86_BUILTIN_VTERNLOGQ128_MASK,
29545 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29546 IX86_BUILTIN_VTERNLOGD128_MASK,
29547 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29548 IX86_BUILTIN_SCALEFPD256,
29549 IX86_BUILTIN_SCALEFPS256,
29550 IX86_BUILTIN_SCALEFPD128,
29551 IX86_BUILTIN_SCALEFPS128,
29552 IX86_BUILTIN_VFMADDPD256_MASK,
29553 IX86_BUILTIN_VFMADDPD256_MASK3,
29554 IX86_BUILTIN_VFMADDPD256_MASKZ,
29555 IX86_BUILTIN_VFMADDPD128_MASK,
29556 IX86_BUILTIN_VFMADDPD128_MASK3,
29557 IX86_BUILTIN_VFMADDPD128_MASKZ,
29558 IX86_BUILTIN_VFMADDPS256_MASK,
29559 IX86_BUILTIN_VFMADDPS256_MASK3,
29560 IX86_BUILTIN_VFMADDPS256_MASKZ,
29561 IX86_BUILTIN_VFMADDPS128_MASK,
29562 IX86_BUILTIN_VFMADDPS128_MASK3,
29563 IX86_BUILTIN_VFMADDPS128_MASKZ,
29564 IX86_BUILTIN_VFMSUBPD256_MASK3,
29565 IX86_BUILTIN_VFMSUBPD128_MASK3,
29566 IX86_BUILTIN_VFMSUBPS256_MASK3,
29567 IX86_BUILTIN_VFMSUBPS128_MASK3,
29568 IX86_BUILTIN_VFNMADDPD256_MASK,
29569 IX86_BUILTIN_VFNMADDPD128_MASK,
29570 IX86_BUILTIN_VFNMADDPS256_MASK,
29571 IX86_BUILTIN_VFNMADDPS128_MASK,
29572 IX86_BUILTIN_VFNMSUBPD256_MASK,
29573 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29574 IX86_BUILTIN_VFNMSUBPD128_MASK,
29575 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29576 IX86_BUILTIN_VFNMSUBPS256_MASK,
29577 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29578 IX86_BUILTIN_VFNMSUBPS128_MASK,
29579 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29580 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29581 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29582 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29583 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29584 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29585 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29586 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29587 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29588 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29589 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29590 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29591 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29592 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29593 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29594 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29595 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29596 IX86_BUILTIN_INSERTF64X2_256,
29597 IX86_BUILTIN_INSERTI64X2_256,
29598 IX86_BUILTIN_PSRAVV16HI,
29599 IX86_BUILTIN_PSRAVV8HI,
29600 IX86_BUILTIN_PMADDUBSW256_MASK,
29601 IX86_BUILTIN_PMADDUBSW128_MASK,
29602 IX86_BUILTIN_PMADDWD256_MASK,
29603 IX86_BUILTIN_PMADDWD128_MASK,
29604 IX86_BUILTIN_PSRLVV16HI,
29605 IX86_BUILTIN_PSRLVV8HI,
29606 IX86_BUILTIN_CVTPS2DQ256_MASK,
29607 IX86_BUILTIN_CVTPS2DQ128_MASK,
29608 IX86_BUILTIN_CVTPS2UDQ256,
29609 IX86_BUILTIN_CVTPS2UDQ128,
29610 IX86_BUILTIN_CVTPS2QQ256,
29611 IX86_BUILTIN_CVTPS2QQ128,
29612 IX86_BUILTIN_CVTPS2UQQ256,
29613 IX86_BUILTIN_CVTPS2UQQ128,
29614 IX86_BUILTIN_GETMANTPS256,
29615 IX86_BUILTIN_GETMANTPS128,
29616 IX86_BUILTIN_GETMANTPD256,
29617 IX86_BUILTIN_GETMANTPD128,
29618 IX86_BUILTIN_MOVDDUP256_MASK,
29619 IX86_BUILTIN_MOVDDUP128_MASK,
29620 IX86_BUILTIN_MOVSHDUP256_MASK,
29621 IX86_BUILTIN_MOVSHDUP128_MASK,
29622 IX86_BUILTIN_MOVSLDUP256_MASK,
29623 IX86_BUILTIN_MOVSLDUP128_MASK,
29624 IX86_BUILTIN_CVTQQ2PS256,
29625 IX86_BUILTIN_CVTQQ2PS128,
29626 IX86_BUILTIN_CVTUQQ2PS256,
29627 IX86_BUILTIN_CVTUQQ2PS128,
29628 IX86_BUILTIN_CVTQQ2PD256,
29629 IX86_BUILTIN_CVTQQ2PD128,
29630 IX86_BUILTIN_CVTUQQ2PD256,
29631 IX86_BUILTIN_CVTUQQ2PD128,
29632 IX86_BUILTIN_VPERMT2VARQ256,
29633 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29634 IX86_BUILTIN_VPERMT2VARD256,
29635 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29636 IX86_BUILTIN_VPERMI2VARQ256,
29637 IX86_BUILTIN_VPERMI2VARD256,
29638 IX86_BUILTIN_VPERMT2VARPD256,
29639 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29640 IX86_BUILTIN_VPERMT2VARPS256,
29641 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29642 IX86_BUILTIN_VPERMI2VARPD256,
29643 IX86_BUILTIN_VPERMI2VARPS256,
29644 IX86_BUILTIN_VPERMT2VARQ128,
29645 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29646 IX86_BUILTIN_VPERMT2VARD128,
29647 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29648 IX86_BUILTIN_VPERMI2VARQ128,
29649 IX86_BUILTIN_VPERMI2VARD128,
29650 IX86_BUILTIN_VPERMT2VARPD128,
29651 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29652 IX86_BUILTIN_VPERMT2VARPS128,
29653 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29654 IX86_BUILTIN_VPERMI2VARPD128,
29655 IX86_BUILTIN_VPERMI2VARPS128,
29656 IX86_BUILTIN_PSHUFB256_MASK,
29657 IX86_BUILTIN_PSHUFB128_MASK,
29658 IX86_BUILTIN_PSHUFHW256_MASK,
29659 IX86_BUILTIN_PSHUFHW128_MASK,
29660 IX86_BUILTIN_PSHUFLW256_MASK,
29661 IX86_BUILTIN_PSHUFLW128_MASK,
29662 IX86_BUILTIN_PSHUFD256_MASK,
29663 IX86_BUILTIN_PSHUFD128_MASK,
29664 IX86_BUILTIN_SHUFPD256_MASK,
29665 IX86_BUILTIN_SHUFPD128_MASK,
29666 IX86_BUILTIN_SHUFPS256_MASK,
29667 IX86_BUILTIN_SHUFPS128_MASK,
29668 IX86_BUILTIN_PROLVQ256,
29669 IX86_BUILTIN_PROLVQ128,
29670 IX86_BUILTIN_PROLQ256,
29671 IX86_BUILTIN_PROLQ128,
29672 IX86_BUILTIN_PRORVQ256,
29673 IX86_BUILTIN_PRORVQ128,
29674 IX86_BUILTIN_PRORQ256,
29675 IX86_BUILTIN_PRORQ128,
29676 IX86_BUILTIN_PSRAVQ128,
29677 IX86_BUILTIN_PSRAVQ256,
29678 IX86_BUILTIN_PSLLVV4DI_MASK,
29679 IX86_BUILTIN_PSLLVV2DI_MASK,
29680 IX86_BUILTIN_PSLLVV8SI_MASK,
29681 IX86_BUILTIN_PSLLVV4SI_MASK,
29682 IX86_BUILTIN_PSRAVV8SI_MASK,
29683 IX86_BUILTIN_PSRAVV4SI_MASK,
29684 IX86_BUILTIN_PSRLVV4DI_MASK,
29685 IX86_BUILTIN_PSRLVV2DI_MASK,
29686 IX86_BUILTIN_PSRLVV8SI_MASK,
29687 IX86_BUILTIN_PSRLVV4SI_MASK,
29688 IX86_BUILTIN_PSRAWI256_MASK,
29689 IX86_BUILTIN_PSRAW256_MASK,
29690 IX86_BUILTIN_PSRAWI128_MASK,
29691 IX86_BUILTIN_PSRAW128_MASK,
29692 IX86_BUILTIN_PSRLWI256_MASK,
29693 IX86_BUILTIN_PSRLW256_MASK,
29694 IX86_BUILTIN_PSRLWI128_MASK,
29695 IX86_BUILTIN_PSRLW128_MASK,
29696 IX86_BUILTIN_PRORVD256,
29697 IX86_BUILTIN_PROLVD256,
29698 IX86_BUILTIN_PRORD256,
29699 IX86_BUILTIN_PROLD256,
29700 IX86_BUILTIN_PRORVD128,
29701 IX86_BUILTIN_PROLVD128,
29702 IX86_BUILTIN_PRORD128,
29703 IX86_BUILTIN_PROLD128,
29704 IX86_BUILTIN_FPCLASSPD256,
29705 IX86_BUILTIN_FPCLASSPD128,
29706 IX86_BUILTIN_FPCLASSSD,
29707 IX86_BUILTIN_FPCLASSPS256,
29708 IX86_BUILTIN_FPCLASSPS128,
29709 IX86_BUILTIN_FPCLASSSS,
29710 IX86_BUILTIN_CVTB2MASK128,
29711 IX86_BUILTIN_CVTB2MASK256,
29712 IX86_BUILTIN_CVTW2MASK128,
29713 IX86_BUILTIN_CVTW2MASK256,
29714 IX86_BUILTIN_CVTD2MASK128,
29715 IX86_BUILTIN_CVTD2MASK256,
29716 IX86_BUILTIN_CVTQ2MASK128,
29717 IX86_BUILTIN_CVTQ2MASK256,
29718 IX86_BUILTIN_CVTMASK2B128,
29719 IX86_BUILTIN_CVTMASK2B256,
29720 IX86_BUILTIN_CVTMASK2W128,
29721 IX86_BUILTIN_CVTMASK2W256,
29722 IX86_BUILTIN_CVTMASK2D128,
29723 IX86_BUILTIN_CVTMASK2D256,
29724 IX86_BUILTIN_CVTMASK2Q128,
29725 IX86_BUILTIN_CVTMASK2Q256,
29726 IX86_BUILTIN_PCMPEQB128_MASK,
29727 IX86_BUILTIN_PCMPEQB256_MASK,
29728 IX86_BUILTIN_PCMPEQW128_MASK,
29729 IX86_BUILTIN_PCMPEQW256_MASK,
29730 IX86_BUILTIN_PCMPEQD128_MASK,
29731 IX86_BUILTIN_PCMPEQD256_MASK,
29732 IX86_BUILTIN_PCMPEQQ128_MASK,
29733 IX86_BUILTIN_PCMPEQQ256_MASK,
29734 IX86_BUILTIN_PCMPGTB128_MASK,
29735 IX86_BUILTIN_PCMPGTB256_MASK,
29736 IX86_BUILTIN_PCMPGTW128_MASK,
29737 IX86_BUILTIN_PCMPGTW256_MASK,
29738 IX86_BUILTIN_PCMPGTD128_MASK,
29739 IX86_BUILTIN_PCMPGTD256_MASK,
29740 IX86_BUILTIN_PCMPGTQ128_MASK,
29741 IX86_BUILTIN_PCMPGTQ256_MASK,
29742 IX86_BUILTIN_PTESTMB128,
29743 IX86_BUILTIN_PTESTMB256,
29744 IX86_BUILTIN_PTESTMW128,
29745 IX86_BUILTIN_PTESTMW256,
29746 IX86_BUILTIN_PTESTMD128,
29747 IX86_BUILTIN_PTESTMD256,
29748 IX86_BUILTIN_PTESTMQ128,
29749 IX86_BUILTIN_PTESTMQ256,
29750 IX86_BUILTIN_PTESTNMB128,
29751 IX86_BUILTIN_PTESTNMB256,
29752 IX86_BUILTIN_PTESTNMW128,
29753 IX86_BUILTIN_PTESTNMW256,
29754 IX86_BUILTIN_PTESTNMD128,
29755 IX86_BUILTIN_PTESTNMD256,
29756 IX86_BUILTIN_PTESTNMQ128,
29757 IX86_BUILTIN_PTESTNMQ256,
29758 IX86_BUILTIN_PBROADCASTMB128,
29759 IX86_BUILTIN_PBROADCASTMB256,
29760 IX86_BUILTIN_PBROADCASTMW128,
29761 IX86_BUILTIN_PBROADCASTMW256,
29762 IX86_BUILTIN_COMPRESSPD256,
29763 IX86_BUILTIN_COMPRESSPD128,
29764 IX86_BUILTIN_COMPRESSPS256,
29765 IX86_BUILTIN_COMPRESSPS128,
29766 IX86_BUILTIN_PCOMPRESSQ256,
29767 IX86_BUILTIN_PCOMPRESSQ128,
29768 IX86_BUILTIN_PCOMPRESSD256,
29769 IX86_BUILTIN_PCOMPRESSD128,
29770 IX86_BUILTIN_EXPANDPD256,
29771 IX86_BUILTIN_EXPANDPD128,
29772 IX86_BUILTIN_EXPANDPS256,
29773 IX86_BUILTIN_EXPANDPS128,
29774 IX86_BUILTIN_PEXPANDQ256,
29775 IX86_BUILTIN_PEXPANDQ128,
29776 IX86_BUILTIN_PEXPANDD256,
29777 IX86_BUILTIN_PEXPANDD128,
29778 IX86_BUILTIN_EXPANDPD256Z,
29779 IX86_BUILTIN_EXPANDPD128Z,
29780 IX86_BUILTIN_EXPANDPS256Z,
29781 IX86_BUILTIN_EXPANDPS128Z,
29782 IX86_BUILTIN_PEXPANDQ256Z,
29783 IX86_BUILTIN_PEXPANDQ128Z,
29784 IX86_BUILTIN_PEXPANDD256Z,
29785 IX86_BUILTIN_PEXPANDD128Z,
29786 IX86_BUILTIN_PMAXSD256_MASK,
29787 IX86_BUILTIN_PMINSD256_MASK,
29788 IX86_BUILTIN_PMAXUD256_MASK,
29789 IX86_BUILTIN_PMINUD256_MASK,
29790 IX86_BUILTIN_PMAXSD128_MASK,
29791 IX86_BUILTIN_PMINSD128_MASK,
29792 IX86_BUILTIN_PMAXUD128_MASK,
29793 IX86_BUILTIN_PMINUD128_MASK,
29794 IX86_BUILTIN_PMAXSQ256_MASK,
29795 IX86_BUILTIN_PMINSQ256_MASK,
29796 IX86_BUILTIN_PMAXUQ256_MASK,
29797 IX86_BUILTIN_PMINUQ256_MASK,
29798 IX86_BUILTIN_PMAXSQ128_MASK,
29799 IX86_BUILTIN_PMINSQ128_MASK,
29800 IX86_BUILTIN_PMAXUQ128_MASK,
29801 IX86_BUILTIN_PMINUQ128_MASK,
29802 IX86_BUILTIN_PMINSB256_MASK,
29803 IX86_BUILTIN_PMINUB256_MASK,
29804 IX86_BUILTIN_PMAXSB256_MASK,
29805 IX86_BUILTIN_PMAXUB256_MASK,
29806 IX86_BUILTIN_PMINSB128_MASK,
29807 IX86_BUILTIN_PMINUB128_MASK,
29808 IX86_BUILTIN_PMAXSB128_MASK,
29809 IX86_BUILTIN_PMAXUB128_MASK,
29810 IX86_BUILTIN_PMINSW256_MASK,
29811 IX86_BUILTIN_PMINUW256_MASK,
29812 IX86_BUILTIN_PMAXSW256_MASK,
29813 IX86_BUILTIN_PMAXUW256_MASK,
29814 IX86_BUILTIN_PMINSW128_MASK,
29815 IX86_BUILTIN_PMINUW128_MASK,
29816 IX86_BUILTIN_PMAXSW128_MASK,
29817 IX86_BUILTIN_PMAXUW128_MASK,
29818 IX86_BUILTIN_VPCONFLICTQ256,
29819 IX86_BUILTIN_VPCONFLICTD256,
29820 IX86_BUILTIN_VPCLZCNTQ256,
29821 IX86_BUILTIN_VPCLZCNTD256,
29822 IX86_BUILTIN_UNPCKHPD256_MASK,
29823 IX86_BUILTIN_UNPCKHPD128_MASK,
29824 IX86_BUILTIN_UNPCKHPS256_MASK,
29825 IX86_BUILTIN_UNPCKHPS128_MASK,
29826 IX86_BUILTIN_UNPCKLPD256_MASK,
29827 IX86_BUILTIN_UNPCKLPD128_MASK,
29828 IX86_BUILTIN_UNPCKLPS256_MASK,
29829 IX86_BUILTIN_VPCONFLICTQ128,
29830 IX86_BUILTIN_VPCONFLICTD128,
29831 IX86_BUILTIN_VPCLZCNTQ128,
29832 IX86_BUILTIN_VPCLZCNTD128,
29833 IX86_BUILTIN_UNPCKLPS128_MASK,
29834 IX86_BUILTIN_ALIGND256,
29835 IX86_BUILTIN_ALIGNQ256,
29836 IX86_BUILTIN_ALIGND128,
29837 IX86_BUILTIN_ALIGNQ128,
29838 IX86_BUILTIN_CVTPS2PH256_MASK,
29839 IX86_BUILTIN_CVTPS2PH_MASK,
29840 IX86_BUILTIN_CVTPH2PS_MASK,
29841 IX86_BUILTIN_CVTPH2PS256_MASK,
29842 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29843 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29844 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29845 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29846 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29847 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29848 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29849 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29850 IX86_BUILTIN_PUNPCKHBW128_MASK,
29851 IX86_BUILTIN_PUNPCKHBW256_MASK,
29852 IX86_BUILTIN_PUNPCKHWD128_MASK,
29853 IX86_BUILTIN_PUNPCKHWD256_MASK,
29854 IX86_BUILTIN_PUNPCKLBW128_MASK,
29855 IX86_BUILTIN_PUNPCKLBW256_MASK,
29856 IX86_BUILTIN_PUNPCKLWD128_MASK,
29857 IX86_BUILTIN_PUNPCKLWD256_MASK,
29858 IX86_BUILTIN_PSLLVV16HI,
29859 IX86_BUILTIN_PSLLVV8HI,
29860 IX86_BUILTIN_PACKSSDW256_MASK,
29861 IX86_BUILTIN_PACKSSDW128_MASK,
29862 IX86_BUILTIN_PACKUSDW256_MASK,
29863 IX86_BUILTIN_PACKUSDW128_MASK,
29864 IX86_BUILTIN_PAVGB256_MASK,
29865 IX86_BUILTIN_PAVGW256_MASK,
29866 IX86_BUILTIN_PAVGB128_MASK,
29867 IX86_BUILTIN_PAVGW128_MASK,
29868 IX86_BUILTIN_VPERMVARSF256_MASK,
29869 IX86_BUILTIN_VPERMVARDF256_MASK,
29870 IX86_BUILTIN_VPERMDF256_MASK,
29871 IX86_BUILTIN_PABSB256_MASK,
29872 IX86_BUILTIN_PABSB128_MASK,
29873 IX86_BUILTIN_PABSW256_MASK,
29874 IX86_BUILTIN_PABSW128_MASK,
29875 IX86_BUILTIN_VPERMILVARPD_MASK,
29876 IX86_BUILTIN_VPERMILVARPS_MASK,
29877 IX86_BUILTIN_VPERMILVARPD256_MASK,
29878 IX86_BUILTIN_VPERMILVARPS256_MASK,
29879 IX86_BUILTIN_VPERMILPD_MASK,
29880 IX86_BUILTIN_VPERMILPS_MASK,
29881 IX86_BUILTIN_VPERMILPD256_MASK,
29882 IX86_BUILTIN_VPERMILPS256_MASK,
29883 IX86_BUILTIN_BLENDMQ256,
29884 IX86_BUILTIN_BLENDMD256,
29885 IX86_BUILTIN_BLENDMPD256,
29886 IX86_BUILTIN_BLENDMPS256,
29887 IX86_BUILTIN_BLENDMQ128,
29888 IX86_BUILTIN_BLENDMD128,
29889 IX86_BUILTIN_BLENDMPD128,
29890 IX86_BUILTIN_BLENDMPS128,
29891 IX86_BUILTIN_BLENDMW256,
29892 IX86_BUILTIN_BLENDMB256,
29893 IX86_BUILTIN_BLENDMW128,
29894 IX86_BUILTIN_BLENDMB128,
29895 IX86_BUILTIN_PMULLD256_MASK,
29896 IX86_BUILTIN_PMULLD128_MASK,
29897 IX86_BUILTIN_PMULUDQ256_MASK,
29898 IX86_BUILTIN_PMULDQ256_MASK,
29899 IX86_BUILTIN_PMULDQ128_MASK,
29900 IX86_BUILTIN_PMULUDQ128_MASK,
29901 IX86_BUILTIN_CVTPD2PS256_MASK,
29902 IX86_BUILTIN_CVTPD2PS_MASK,
29903 IX86_BUILTIN_VPERMVARSI256_MASK,
29904 IX86_BUILTIN_VPERMVARDI256_MASK,
29905 IX86_BUILTIN_VPERMDI256_MASK,
29906 IX86_BUILTIN_CMPQ256,
29907 IX86_BUILTIN_CMPD256,
29908 IX86_BUILTIN_UCMPQ256,
29909 IX86_BUILTIN_UCMPD256,
29910 IX86_BUILTIN_CMPB256,
29911 IX86_BUILTIN_CMPW256,
29912 IX86_BUILTIN_UCMPB256,
29913 IX86_BUILTIN_UCMPW256,
29914 IX86_BUILTIN_CMPPD256_MASK,
29915 IX86_BUILTIN_CMPPS256_MASK,
29916 IX86_BUILTIN_CMPQ128,
29917 IX86_BUILTIN_CMPD128,
29918 IX86_BUILTIN_UCMPQ128,
29919 IX86_BUILTIN_UCMPD128,
29920 IX86_BUILTIN_CMPB128,
29921 IX86_BUILTIN_CMPW128,
29922 IX86_BUILTIN_UCMPB128,
29923 IX86_BUILTIN_UCMPW128,
29924 IX86_BUILTIN_CMPPD128_MASK,
29925 IX86_BUILTIN_CMPPS128_MASK,
29927 IX86_BUILTIN_GATHER3SIV8SF,
29928 IX86_BUILTIN_GATHER3SIV4SF,
29929 IX86_BUILTIN_GATHER3SIV4DF,
29930 IX86_BUILTIN_GATHER3SIV2DF,
29931 IX86_BUILTIN_GATHER3DIV8SF,
29932 IX86_BUILTIN_GATHER3DIV4SF,
29933 IX86_BUILTIN_GATHER3DIV4DF,
29934 IX86_BUILTIN_GATHER3DIV2DF,
29935 IX86_BUILTIN_GATHER3SIV8SI,
29936 IX86_BUILTIN_GATHER3SIV4SI,
29937 IX86_BUILTIN_GATHER3SIV4DI,
29938 IX86_BUILTIN_GATHER3SIV2DI,
29939 IX86_BUILTIN_GATHER3DIV8SI,
29940 IX86_BUILTIN_GATHER3DIV4SI,
29941 IX86_BUILTIN_GATHER3DIV4DI,
29942 IX86_BUILTIN_GATHER3DIV2DI,
29943 IX86_BUILTIN_SCATTERSIV8SF,
29944 IX86_BUILTIN_SCATTERSIV4SF,
29945 IX86_BUILTIN_SCATTERSIV4DF,
29946 IX86_BUILTIN_SCATTERSIV2DF,
29947 IX86_BUILTIN_SCATTERDIV8SF,
29948 IX86_BUILTIN_SCATTERDIV4SF,
29949 IX86_BUILTIN_SCATTERDIV4DF,
29950 IX86_BUILTIN_SCATTERDIV2DF,
29951 IX86_BUILTIN_SCATTERSIV8SI,
29952 IX86_BUILTIN_SCATTERSIV4SI,
29953 IX86_BUILTIN_SCATTERSIV4DI,
29954 IX86_BUILTIN_SCATTERSIV2DI,
29955 IX86_BUILTIN_SCATTERDIV8SI,
29956 IX86_BUILTIN_SCATTERDIV4SI,
29957 IX86_BUILTIN_SCATTERDIV4DI,
29958 IX86_BUILTIN_SCATTERDIV2DI,
29960 /* AVX512DQ. */
29961 IX86_BUILTIN_RANGESD128,
29962 IX86_BUILTIN_RANGESS128,
29963 IX86_BUILTIN_KUNPCKWD,
29964 IX86_BUILTIN_KUNPCKDQ,
29965 IX86_BUILTIN_BROADCASTF32x2_512,
29966 IX86_BUILTIN_BROADCASTI32x2_512,
29967 IX86_BUILTIN_BROADCASTF64X2_512,
29968 IX86_BUILTIN_BROADCASTI64X2_512,
29969 IX86_BUILTIN_BROADCASTF32X8_512,
29970 IX86_BUILTIN_BROADCASTI32X8_512,
29971 IX86_BUILTIN_EXTRACTF64X2_512,
29972 IX86_BUILTIN_EXTRACTF32X8,
29973 IX86_BUILTIN_EXTRACTI64X2_512,
29974 IX86_BUILTIN_EXTRACTI32X8,
29975 IX86_BUILTIN_REDUCEPD512_MASK,
29976 IX86_BUILTIN_REDUCEPS512_MASK,
29977 IX86_BUILTIN_PMULLQ512,
29978 IX86_BUILTIN_XORPD512,
29979 IX86_BUILTIN_XORPS512,
29980 IX86_BUILTIN_ORPD512,
29981 IX86_BUILTIN_ORPS512,
29982 IX86_BUILTIN_ANDPD512,
29983 IX86_BUILTIN_ANDPS512,
29984 IX86_BUILTIN_ANDNPD512,
29985 IX86_BUILTIN_ANDNPS512,
29986 IX86_BUILTIN_INSERTF32X8,
29987 IX86_BUILTIN_INSERTI32X8,
29988 IX86_BUILTIN_INSERTF64X2_512,
29989 IX86_BUILTIN_INSERTI64X2_512,
29990 IX86_BUILTIN_FPCLASSPD512,
29991 IX86_BUILTIN_FPCLASSPS512,
29992 IX86_BUILTIN_CVTD2MASK512,
29993 IX86_BUILTIN_CVTQ2MASK512,
29994 IX86_BUILTIN_CVTMASK2D512,
29995 IX86_BUILTIN_CVTMASK2Q512,
29996 IX86_BUILTIN_CVTPD2QQ512,
29997 IX86_BUILTIN_CVTPS2QQ512,
29998 IX86_BUILTIN_CVTPD2UQQ512,
29999 IX86_BUILTIN_CVTPS2UQQ512,
30000 IX86_BUILTIN_CVTQQ2PS512,
30001 IX86_BUILTIN_CVTUQQ2PS512,
30002 IX86_BUILTIN_CVTQQ2PD512,
30003 IX86_BUILTIN_CVTUQQ2PD512,
30004 IX86_BUILTIN_CVTTPS2QQ512,
30005 IX86_BUILTIN_CVTTPS2UQQ512,
30006 IX86_BUILTIN_CVTTPD2QQ512,
30007 IX86_BUILTIN_CVTTPD2UQQ512,
30008 IX86_BUILTIN_RANGEPS512,
30009 IX86_BUILTIN_RANGEPD512,
30011 /* AVX512BW. */
30012 IX86_BUILTIN_PACKUSDW512,
30013 IX86_BUILTIN_PACKSSDW512,
30014 IX86_BUILTIN_LOADDQUHI512_MASK,
30015 IX86_BUILTIN_LOADDQUQI512_MASK,
30016 IX86_BUILTIN_PSLLDQ512,
30017 IX86_BUILTIN_PSRLDQ512,
30018 IX86_BUILTIN_STOREDQUHI512_MASK,
30019 IX86_BUILTIN_STOREDQUQI512_MASK,
30020 IX86_BUILTIN_PALIGNR512,
30021 IX86_BUILTIN_PALIGNR512_MASK,
30022 IX86_BUILTIN_MOVDQUHI512_MASK,
30023 IX86_BUILTIN_MOVDQUQI512_MASK,
30024 IX86_BUILTIN_PSADBW512,
30025 IX86_BUILTIN_DBPSADBW512,
30026 IX86_BUILTIN_PBROADCASTB512,
30027 IX86_BUILTIN_PBROADCASTB512_GPR,
30028 IX86_BUILTIN_PBROADCASTW512,
30029 IX86_BUILTIN_PBROADCASTW512_GPR,
30030 IX86_BUILTIN_PMOVSXBW512_MASK,
30031 IX86_BUILTIN_PMOVZXBW512_MASK,
30032 IX86_BUILTIN_VPERMVARHI512_MASK,
30033 IX86_BUILTIN_VPERMT2VARHI512,
30034 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30035 IX86_BUILTIN_VPERMI2VARHI512,
30036 IX86_BUILTIN_PAVGB512,
30037 IX86_BUILTIN_PAVGW512,
30038 IX86_BUILTIN_PADDB512,
30039 IX86_BUILTIN_PSUBB512,
30040 IX86_BUILTIN_PSUBSB512,
30041 IX86_BUILTIN_PADDSB512,
30042 IX86_BUILTIN_PSUBUSB512,
30043 IX86_BUILTIN_PADDUSB512,
30044 IX86_BUILTIN_PSUBW512,
30045 IX86_BUILTIN_PADDW512,
30046 IX86_BUILTIN_PSUBSW512,
30047 IX86_BUILTIN_PADDSW512,
30048 IX86_BUILTIN_PSUBUSW512,
30049 IX86_BUILTIN_PADDUSW512,
30050 IX86_BUILTIN_PMAXUW512,
30051 IX86_BUILTIN_PMAXSW512,
30052 IX86_BUILTIN_PMINUW512,
30053 IX86_BUILTIN_PMINSW512,
30054 IX86_BUILTIN_PMAXUB512,
30055 IX86_BUILTIN_PMAXSB512,
30056 IX86_BUILTIN_PMINUB512,
30057 IX86_BUILTIN_PMINSB512,
30058 IX86_BUILTIN_PMOVWB512,
30059 IX86_BUILTIN_PMOVSWB512,
30060 IX86_BUILTIN_PMOVUSWB512,
30061 IX86_BUILTIN_PMULHRSW512_MASK,
30062 IX86_BUILTIN_PMULHUW512_MASK,
30063 IX86_BUILTIN_PMULHW512_MASK,
30064 IX86_BUILTIN_PMULLW512_MASK,
30065 IX86_BUILTIN_PSLLWI512_MASK,
30066 IX86_BUILTIN_PSLLW512_MASK,
30067 IX86_BUILTIN_PACKSSWB512,
30068 IX86_BUILTIN_PACKUSWB512,
30069 IX86_BUILTIN_PSRAVV32HI,
30070 IX86_BUILTIN_PMADDUBSW512_MASK,
30071 IX86_BUILTIN_PMADDWD512_MASK,
30072 IX86_BUILTIN_PSRLVV32HI,
30073 IX86_BUILTIN_PUNPCKHBW512,
30074 IX86_BUILTIN_PUNPCKHWD512,
30075 IX86_BUILTIN_PUNPCKLBW512,
30076 IX86_BUILTIN_PUNPCKLWD512,
30077 IX86_BUILTIN_PSHUFB512,
30078 IX86_BUILTIN_PSHUFHW512,
30079 IX86_BUILTIN_PSHUFLW512,
30080 IX86_BUILTIN_PSRAWI512,
30081 IX86_BUILTIN_PSRAW512,
30082 IX86_BUILTIN_PSRLWI512,
30083 IX86_BUILTIN_PSRLW512,
30084 IX86_BUILTIN_CVTB2MASK512,
30085 IX86_BUILTIN_CVTW2MASK512,
30086 IX86_BUILTIN_CVTMASK2B512,
30087 IX86_BUILTIN_CVTMASK2W512,
30088 IX86_BUILTIN_PCMPEQB512_MASK,
30089 IX86_BUILTIN_PCMPEQW512_MASK,
30090 IX86_BUILTIN_PCMPGTB512_MASK,
30091 IX86_BUILTIN_PCMPGTW512_MASK,
30092 IX86_BUILTIN_PTESTMB512,
30093 IX86_BUILTIN_PTESTMW512,
30094 IX86_BUILTIN_PTESTNMB512,
30095 IX86_BUILTIN_PTESTNMW512,
30096 IX86_BUILTIN_PSLLVV32HI,
30097 IX86_BUILTIN_PABSB512,
30098 IX86_BUILTIN_PABSW512,
30099 IX86_BUILTIN_BLENDMW512,
30100 IX86_BUILTIN_BLENDMB512,
30101 IX86_BUILTIN_CMPB512,
30102 IX86_BUILTIN_CMPW512,
30103 IX86_BUILTIN_UCMPB512,
30104 IX86_BUILTIN_UCMPW512,
30106 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30107 where all operands are 32-byte or 64-byte wide respectively. */
30108 IX86_BUILTIN_GATHERALTSIV4DF,
30109 IX86_BUILTIN_GATHERALTDIV8SF,
30110 IX86_BUILTIN_GATHERALTSIV4DI,
30111 IX86_BUILTIN_GATHERALTDIV8SI,
30112 IX86_BUILTIN_GATHER3ALTDIV16SF,
30113 IX86_BUILTIN_GATHER3ALTDIV16SI,
30114 IX86_BUILTIN_GATHER3ALTSIV4DF,
30115 IX86_BUILTIN_GATHER3ALTDIV8SF,
30116 IX86_BUILTIN_GATHER3ALTSIV4DI,
30117 IX86_BUILTIN_GATHER3ALTDIV8SI,
30118 IX86_BUILTIN_GATHER3ALTSIV8DF,
30119 IX86_BUILTIN_GATHER3ALTSIV8DI,
30120 IX86_BUILTIN_GATHER3DIV16SF,
30121 IX86_BUILTIN_GATHER3DIV16SI,
30122 IX86_BUILTIN_GATHER3DIV8DF,
30123 IX86_BUILTIN_GATHER3DIV8DI,
30124 IX86_BUILTIN_GATHER3SIV16SF,
30125 IX86_BUILTIN_GATHER3SIV16SI,
30126 IX86_BUILTIN_GATHER3SIV8DF,
30127 IX86_BUILTIN_GATHER3SIV8DI,
30128 IX86_BUILTIN_SCATTERDIV16SF,
30129 IX86_BUILTIN_SCATTERDIV16SI,
30130 IX86_BUILTIN_SCATTERDIV8DF,
30131 IX86_BUILTIN_SCATTERDIV8DI,
30132 IX86_BUILTIN_SCATTERSIV16SF,
30133 IX86_BUILTIN_SCATTERSIV16SI,
30134 IX86_BUILTIN_SCATTERSIV8DF,
30135 IX86_BUILTIN_SCATTERSIV8DI,
30137 /* AVX512PF */
30138 IX86_BUILTIN_GATHERPFQPD,
30139 IX86_BUILTIN_GATHERPFDPS,
30140 IX86_BUILTIN_GATHERPFDPD,
30141 IX86_BUILTIN_GATHERPFQPS,
30142 IX86_BUILTIN_SCATTERPFDPD,
30143 IX86_BUILTIN_SCATTERPFDPS,
30144 IX86_BUILTIN_SCATTERPFQPD,
30145 IX86_BUILTIN_SCATTERPFQPS,
30147 /* AVX-512ER */
30148 IX86_BUILTIN_EXP2PD_MASK,
30149 IX86_BUILTIN_EXP2PS_MASK,
30150 IX86_BUILTIN_EXP2PS,
30151 IX86_BUILTIN_RCP28PD,
30152 IX86_BUILTIN_RCP28PS,
30153 IX86_BUILTIN_RCP28SD,
30154 IX86_BUILTIN_RCP28SS,
30155 IX86_BUILTIN_RSQRT28PD,
30156 IX86_BUILTIN_RSQRT28PS,
30157 IX86_BUILTIN_RSQRT28SD,
30158 IX86_BUILTIN_RSQRT28SS,
30160 /* AVX-512IFMA */
30161 IX86_BUILTIN_VPMADD52LUQ512,
30162 IX86_BUILTIN_VPMADD52HUQ512,
30163 IX86_BUILTIN_VPMADD52LUQ256,
30164 IX86_BUILTIN_VPMADD52HUQ256,
30165 IX86_BUILTIN_VPMADD52LUQ128,
30166 IX86_BUILTIN_VPMADD52HUQ128,
30167 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30168 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30169 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30170 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30171 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30172 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30174 /* AVX-512VBMI */
30175 IX86_BUILTIN_VPMULTISHIFTQB512,
30176 IX86_BUILTIN_VPMULTISHIFTQB256,
30177 IX86_BUILTIN_VPMULTISHIFTQB128,
30178 IX86_BUILTIN_VPERMVARQI512_MASK,
30179 IX86_BUILTIN_VPERMT2VARQI512,
30180 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30181 IX86_BUILTIN_VPERMI2VARQI512,
30182 IX86_BUILTIN_VPERMVARQI256_MASK,
30183 IX86_BUILTIN_VPERMVARQI128_MASK,
30184 IX86_BUILTIN_VPERMT2VARQI256,
30185 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30186 IX86_BUILTIN_VPERMT2VARQI128,
30187 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30188 IX86_BUILTIN_VPERMI2VARQI256,
30189 IX86_BUILTIN_VPERMI2VARQI128,
30191 /* SHA builtins. */
30192 IX86_BUILTIN_SHA1MSG1,
30193 IX86_BUILTIN_SHA1MSG2,
30194 IX86_BUILTIN_SHA1NEXTE,
30195 IX86_BUILTIN_SHA1RNDS4,
30196 IX86_BUILTIN_SHA256MSG1,
30197 IX86_BUILTIN_SHA256MSG2,
30198 IX86_BUILTIN_SHA256RNDS2,
30200 /* CLWB instructions. */
30201 IX86_BUILTIN_CLWB,
30203 /* PCOMMIT instructions. */
30204 IX86_BUILTIN_PCOMMIT,
30206 /* CLFLUSHOPT instructions. */
30207 IX86_BUILTIN_CLFLUSHOPT,
30209 /* TFmode support builtins. */
30210 IX86_BUILTIN_INFQ,
30211 IX86_BUILTIN_HUGE_VALQ,
30212 IX86_BUILTIN_FABSQ,
30213 IX86_BUILTIN_COPYSIGNQ,
30215 /* Vectorizer support builtins. */
30216 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30217 IX86_BUILTIN_CPYSGNPS,
30218 IX86_BUILTIN_CPYSGNPD,
30219 IX86_BUILTIN_CPYSGNPS256,
30220 IX86_BUILTIN_CPYSGNPS512,
30221 IX86_BUILTIN_CPYSGNPD256,
30222 IX86_BUILTIN_CPYSGNPD512,
30223 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30224 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30227 /* FMA4 instructions. */
30228 IX86_BUILTIN_VFMADDSS,
30229 IX86_BUILTIN_VFMADDSD,
30230 IX86_BUILTIN_VFMADDPS,
30231 IX86_BUILTIN_VFMADDPD,
30232 IX86_BUILTIN_VFMADDPS256,
30233 IX86_BUILTIN_VFMADDPD256,
30234 IX86_BUILTIN_VFMADDSUBPS,
30235 IX86_BUILTIN_VFMADDSUBPD,
30236 IX86_BUILTIN_VFMADDSUBPS256,
30237 IX86_BUILTIN_VFMADDSUBPD256,
30239 /* FMA3 instructions. */
30240 IX86_BUILTIN_VFMADDSS3,
30241 IX86_BUILTIN_VFMADDSD3,
30243 /* XOP instructions. */
30244 IX86_BUILTIN_VPCMOV,
30245 IX86_BUILTIN_VPCMOV_V2DI,
30246 IX86_BUILTIN_VPCMOV_V4SI,
30247 IX86_BUILTIN_VPCMOV_V8HI,
30248 IX86_BUILTIN_VPCMOV_V16QI,
30249 IX86_BUILTIN_VPCMOV_V4SF,
30250 IX86_BUILTIN_VPCMOV_V2DF,
30251 IX86_BUILTIN_VPCMOV256,
30252 IX86_BUILTIN_VPCMOV_V4DI256,
30253 IX86_BUILTIN_VPCMOV_V8SI256,
30254 IX86_BUILTIN_VPCMOV_V16HI256,
30255 IX86_BUILTIN_VPCMOV_V32QI256,
30256 IX86_BUILTIN_VPCMOV_V8SF256,
30257 IX86_BUILTIN_VPCMOV_V4DF256,
30259 IX86_BUILTIN_VPPERM,
30261 IX86_BUILTIN_VPMACSSWW,
30262 IX86_BUILTIN_VPMACSWW,
30263 IX86_BUILTIN_VPMACSSWD,
30264 IX86_BUILTIN_VPMACSWD,
30265 IX86_BUILTIN_VPMACSSDD,
30266 IX86_BUILTIN_VPMACSDD,
30267 IX86_BUILTIN_VPMACSSDQL,
30268 IX86_BUILTIN_VPMACSSDQH,
30269 IX86_BUILTIN_VPMACSDQL,
30270 IX86_BUILTIN_VPMACSDQH,
30271 IX86_BUILTIN_VPMADCSSWD,
30272 IX86_BUILTIN_VPMADCSWD,
30274 IX86_BUILTIN_VPHADDBW,
30275 IX86_BUILTIN_VPHADDBD,
30276 IX86_BUILTIN_VPHADDBQ,
30277 IX86_BUILTIN_VPHADDWD,
30278 IX86_BUILTIN_VPHADDWQ,
30279 IX86_BUILTIN_VPHADDDQ,
30280 IX86_BUILTIN_VPHADDUBW,
30281 IX86_BUILTIN_VPHADDUBD,
30282 IX86_BUILTIN_VPHADDUBQ,
30283 IX86_BUILTIN_VPHADDUWD,
30284 IX86_BUILTIN_VPHADDUWQ,
30285 IX86_BUILTIN_VPHADDUDQ,
30286 IX86_BUILTIN_VPHSUBBW,
30287 IX86_BUILTIN_VPHSUBWD,
30288 IX86_BUILTIN_VPHSUBDQ,
30290 IX86_BUILTIN_VPROTB,
30291 IX86_BUILTIN_VPROTW,
30292 IX86_BUILTIN_VPROTD,
30293 IX86_BUILTIN_VPROTQ,
30294 IX86_BUILTIN_VPROTB_IMM,
30295 IX86_BUILTIN_VPROTW_IMM,
30296 IX86_BUILTIN_VPROTD_IMM,
30297 IX86_BUILTIN_VPROTQ_IMM,
30299 IX86_BUILTIN_VPSHLB,
30300 IX86_BUILTIN_VPSHLW,
30301 IX86_BUILTIN_VPSHLD,
30302 IX86_BUILTIN_VPSHLQ,
30303 IX86_BUILTIN_VPSHAB,
30304 IX86_BUILTIN_VPSHAW,
30305 IX86_BUILTIN_VPSHAD,
30306 IX86_BUILTIN_VPSHAQ,
30308 IX86_BUILTIN_VFRCZSS,
30309 IX86_BUILTIN_VFRCZSD,
30310 IX86_BUILTIN_VFRCZPS,
30311 IX86_BUILTIN_VFRCZPD,
30312 IX86_BUILTIN_VFRCZPS256,
30313 IX86_BUILTIN_VFRCZPD256,
30315 IX86_BUILTIN_VPCOMEQUB,
30316 IX86_BUILTIN_VPCOMNEUB,
30317 IX86_BUILTIN_VPCOMLTUB,
30318 IX86_BUILTIN_VPCOMLEUB,
30319 IX86_BUILTIN_VPCOMGTUB,
30320 IX86_BUILTIN_VPCOMGEUB,
30321 IX86_BUILTIN_VPCOMFALSEUB,
30322 IX86_BUILTIN_VPCOMTRUEUB,
30324 IX86_BUILTIN_VPCOMEQUW,
30325 IX86_BUILTIN_VPCOMNEUW,
30326 IX86_BUILTIN_VPCOMLTUW,
30327 IX86_BUILTIN_VPCOMLEUW,
30328 IX86_BUILTIN_VPCOMGTUW,
30329 IX86_BUILTIN_VPCOMGEUW,
30330 IX86_BUILTIN_VPCOMFALSEUW,
30331 IX86_BUILTIN_VPCOMTRUEUW,
30333 IX86_BUILTIN_VPCOMEQUD,
30334 IX86_BUILTIN_VPCOMNEUD,
30335 IX86_BUILTIN_VPCOMLTUD,
30336 IX86_BUILTIN_VPCOMLEUD,
30337 IX86_BUILTIN_VPCOMGTUD,
30338 IX86_BUILTIN_VPCOMGEUD,
30339 IX86_BUILTIN_VPCOMFALSEUD,
30340 IX86_BUILTIN_VPCOMTRUEUD,
30342 IX86_BUILTIN_VPCOMEQUQ,
30343 IX86_BUILTIN_VPCOMNEUQ,
30344 IX86_BUILTIN_VPCOMLTUQ,
30345 IX86_BUILTIN_VPCOMLEUQ,
30346 IX86_BUILTIN_VPCOMGTUQ,
30347 IX86_BUILTIN_VPCOMGEUQ,
30348 IX86_BUILTIN_VPCOMFALSEUQ,
30349 IX86_BUILTIN_VPCOMTRUEUQ,
30351 IX86_BUILTIN_VPCOMEQB,
30352 IX86_BUILTIN_VPCOMNEB,
30353 IX86_BUILTIN_VPCOMLTB,
30354 IX86_BUILTIN_VPCOMLEB,
30355 IX86_BUILTIN_VPCOMGTB,
30356 IX86_BUILTIN_VPCOMGEB,
30357 IX86_BUILTIN_VPCOMFALSEB,
30358 IX86_BUILTIN_VPCOMTRUEB,
30360 IX86_BUILTIN_VPCOMEQW,
30361 IX86_BUILTIN_VPCOMNEW,
30362 IX86_BUILTIN_VPCOMLTW,
30363 IX86_BUILTIN_VPCOMLEW,
30364 IX86_BUILTIN_VPCOMGTW,
30365 IX86_BUILTIN_VPCOMGEW,
30366 IX86_BUILTIN_VPCOMFALSEW,
30367 IX86_BUILTIN_VPCOMTRUEW,
30369 IX86_BUILTIN_VPCOMEQD,
30370 IX86_BUILTIN_VPCOMNED,
30371 IX86_BUILTIN_VPCOMLTD,
30372 IX86_BUILTIN_VPCOMLED,
30373 IX86_BUILTIN_VPCOMGTD,
30374 IX86_BUILTIN_VPCOMGED,
30375 IX86_BUILTIN_VPCOMFALSED,
30376 IX86_BUILTIN_VPCOMTRUED,
30378 IX86_BUILTIN_VPCOMEQQ,
30379 IX86_BUILTIN_VPCOMNEQ,
30380 IX86_BUILTIN_VPCOMLTQ,
30381 IX86_BUILTIN_VPCOMLEQ,
30382 IX86_BUILTIN_VPCOMGTQ,
30383 IX86_BUILTIN_VPCOMGEQ,
30384 IX86_BUILTIN_VPCOMFALSEQ,
30385 IX86_BUILTIN_VPCOMTRUEQ,
30387 /* LWP instructions. */
30388 IX86_BUILTIN_LLWPCB,
30389 IX86_BUILTIN_SLWPCB,
30390 IX86_BUILTIN_LWPVAL32,
30391 IX86_BUILTIN_LWPVAL64,
30392 IX86_BUILTIN_LWPINS32,
30393 IX86_BUILTIN_LWPINS64,
30395 IX86_BUILTIN_CLZS,
30397 /* RTM */
30398 IX86_BUILTIN_XBEGIN,
30399 IX86_BUILTIN_XEND,
30400 IX86_BUILTIN_XABORT,
30401 IX86_BUILTIN_XTEST,
30403 /* MPX */
30404 IX86_BUILTIN_BNDMK,
30405 IX86_BUILTIN_BNDSTX,
30406 IX86_BUILTIN_BNDLDX,
30407 IX86_BUILTIN_BNDCL,
30408 IX86_BUILTIN_BNDCU,
30409 IX86_BUILTIN_BNDRET,
30410 IX86_BUILTIN_BNDNARROW,
30411 IX86_BUILTIN_BNDINT,
30412 IX86_BUILTIN_SIZEOF,
30413 IX86_BUILTIN_BNDLOWER,
30414 IX86_BUILTIN_BNDUPPER,
30416 /* BMI instructions. */
30417 IX86_BUILTIN_BEXTR32,
30418 IX86_BUILTIN_BEXTR64,
30419 IX86_BUILTIN_CTZS,
30421 /* TBM instructions. */
30422 IX86_BUILTIN_BEXTRI32,
30423 IX86_BUILTIN_BEXTRI64,
30425 /* BMI2 instructions. */
30426 IX86_BUILTIN_BZHI32,
30427 IX86_BUILTIN_BZHI64,
30428 IX86_BUILTIN_PDEP32,
30429 IX86_BUILTIN_PDEP64,
30430 IX86_BUILTIN_PEXT32,
30431 IX86_BUILTIN_PEXT64,
30433 /* ADX instructions. */
30434 IX86_BUILTIN_ADDCARRYX32,
30435 IX86_BUILTIN_ADDCARRYX64,
30437 /* SBB instructions. */
30438 IX86_BUILTIN_SBB32,
30439 IX86_BUILTIN_SBB64,
30441 /* FSGSBASE instructions. */
30442 IX86_BUILTIN_RDFSBASE32,
30443 IX86_BUILTIN_RDFSBASE64,
30444 IX86_BUILTIN_RDGSBASE32,
30445 IX86_BUILTIN_RDGSBASE64,
30446 IX86_BUILTIN_WRFSBASE32,
30447 IX86_BUILTIN_WRFSBASE64,
30448 IX86_BUILTIN_WRGSBASE32,
30449 IX86_BUILTIN_WRGSBASE64,
30451 /* RDRND instructions. */
30452 IX86_BUILTIN_RDRAND16_STEP,
30453 IX86_BUILTIN_RDRAND32_STEP,
30454 IX86_BUILTIN_RDRAND64_STEP,
30456 /* RDSEED instructions. */
30457 IX86_BUILTIN_RDSEED16_STEP,
30458 IX86_BUILTIN_RDSEED32_STEP,
30459 IX86_BUILTIN_RDSEED64_STEP,
30461 /* F16C instructions. */
30462 IX86_BUILTIN_CVTPH2PS,
30463 IX86_BUILTIN_CVTPH2PS256,
30464 IX86_BUILTIN_CVTPS2PH,
30465 IX86_BUILTIN_CVTPS2PH256,
30467 /* CFString built-in for darwin */
30468 IX86_BUILTIN_CFSTRING,
30470 /* Builtins to get CPU type and supported features. */
30471 IX86_BUILTIN_CPU_INIT,
30472 IX86_BUILTIN_CPU_IS,
30473 IX86_BUILTIN_CPU_SUPPORTS,
30475 /* Read/write FLAGS register built-ins. */
30476 IX86_BUILTIN_READ_FLAGS,
30477 IX86_BUILTIN_WRITE_FLAGS,
30479 IX86_BUILTIN_MAX
30482 /* Table for the ix86 builtin decls. */
30483 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30485 /* Table of all of the builtin functions that are possible with different ISA's
30486 but are waiting to be built until a function is declared to use that
30487 ISA. */
30488 struct builtin_isa {
30489 const char *name; /* function name */
30490 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30491 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30492 bool const_p; /* true if the declaration is constant */
30493 bool leaf_p; /* true if the declaration has leaf attribute */
30494 bool nothrow_p; /* true if the declaration has nothrow attribute */
30495 bool set_and_not_built_p;
30498 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30500 /* Bits that can still enable any inclusion of a builtin. */
30501 static HOST_WIDE_INT deferred_isa_values = 0;
30503 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30504 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30505 function decl in the ix86_builtins array. Returns the function decl or
30506 NULL_TREE, if the builtin was not added.
30508 If the front end has a special hook for builtin functions, delay adding
30509 builtin functions that aren't in the current ISA until the ISA is changed
30510 with function specific optimization. Doing so, can save about 300K for the
30511 default compiler. When the builtin is expanded, check at that time whether
30512 it is valid.
30514 If the front end doesn't have a special hook, record all builtins, even if
30515 it isn't an instruction set in the current ISA in case the user uses
30516 function specific options for a different ISA, so that we don't get scope
30517 errors if a builtin is added in the middle of a function scope. */
30519 static inline tree
30520 def_builtin (HOST_WIDE_INT mask, const char *name,
30521 enum ix86_builtin_func_type tcode,
30522 enum ix86_builtins code)
30524 tree decl = NULL_TREE;
30526 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30528 ix86_builtins_isa[(int) code].isa = mask;
30530 mask &= ~OPTION_MASK_ISA_64BIT;
30531 if (mask == 0
30532 || (mask & ix86_isa_flags) != 0
30533 || (lang_hooks.builtin_function
30534 == lang_hooks.builtin_function_ext_scope))
30537 tree type = ix86_get_builtin_func_type (tcode);
30538 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30539 NULL, NULL_TREE);
30540 ix86_builtins[(int) code] = decl;
30541 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30543 else
30545 /* Just a MASK where set_and_not_built_p == true can potentially
30546 include a builtin. */
30547 deferred_isa_values |= mask;
30548 ix86_builtins[(int) code] = NULL_TREE;
30549 ix86_builtins_isa[(int) code].tcode = tcode;
30550 ix86_builtins_isa[(int) code].name = name;
30551 ix86_builtins_isa[(int) code].leaf_p = false;
30552 ix86_builtins_isa[(int) code].nothrow_p = false;
30553 ix86_builtins_isa[(int) code].const_p = false;
30554 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30558 return decl;
30561 /* Like def_builtin, but also marks the function decl "const". */
30563 static inline tree
30564 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30565 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30567 tree decl = def_builtin (mask, name, tcode, code);
30568 if (decl)
30569 TREE_READONLY (decl) = 1;
30570 else
30571 ix86_builtins_isa[(int) code].const_p = true;
30573 return decl;
30576 /* Add any new builtin functions for a given ISA that may not have been
30577 declared. This saves a bit of space compared to adding all of the
30578 declarations to the tree, even if we didn't use them. */
30580 static void
30581 ix86_add_new_builtins (HOST_WIDE_INT isa)
30583 if ((isa & deferred_isa_values) == 0)
30584 return;
30586 /* Bits in ISA value can be removed from potential isa values. */
30587 deferred_isa_values &= ~isa;
30589 int i;
30590 tree saved_current_target_pragma = current_target_pragma;
30591 current_target_pragma = NULL_TREE;
30593 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30595 if ((ix86_builtins_isa[i].isa & isa) != 0
30596 && ix86_builtins_isa[i].set_and_not_built_p)
30598 tree decl, type;
30600 /* Don't define the builtin again. */
30601 ix86_builtins_isa[i].set_and_not_built_p = false;
30603 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30604 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30605 type, i, BUILT_IN_MD, NULL,
30606 NULL_TREE);
30608 ix86_builtins[i] = decl;
30609 if (ix86_builtins_isa[i].const_p)
30610 TREE_READONLY (decl) = 1;
30611 if (ix86_builtins_isa[i].leaf_p)
30612 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30613 NULL_TREE);
30614 if (ix86_builtins_isa[i].nothrow_p)
30615 TREE_NOTHROW (decl) = 1;
30619 current_target_pragma = saved_current_target_pragma;
30622 /* Bits for builtin_description.flag. */
30624 /* Set when we don't support the comparison natively, and should
30625 swap_comparison in order to support it. */
30626 #define BUILTIN_DESC_SWAP_OPERANDS 1
30628 struct builtin_description
30630 const HOST_WIDE_INT mask;
30631 const enum insn_code icode;
30632 const char *const name;
30633 const enum ix86_builtins code;
30634 const enum rtx_code comparison;
30635 const int flag;
30638 static const struct builtin_description bdesc_comi[] =
30640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30646 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30666 static const struct builtin_description bdesc_pcmpestr[] =
30668 /* SSE4.2 */
30669 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30670 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30671 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30672 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30673 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30674 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30675 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30678 static const struct builtin_description bdesc_pcmpistr[] =
30680 /* SSE4.2 */
30681 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30682 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30683 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30684 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30685 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30686 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30687 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30690 /* Special builtins with variable number of arguments. */
30691 static const struct builtin_description bdesc_special_args[] =
30693 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30694 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30695 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30697 /* 80387 (for use internally for atomic compound assignment). */
30698 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30699 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30700 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30701 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30703 /* MMX */
30704 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30706 /* 3DNow! */
30707 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30709 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30710 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30711 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30712 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30713 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30714 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30715 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30716 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30717 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30719 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30720 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30721 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30722 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30723 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30724 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30725 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30726 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30728 /* SSE */
30729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30733 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30736 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30738 /* SSE or 3DNow!A */
30739 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30740 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30742 /* SSE2 */
30743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30750 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30757 /* SSE3 */
30758 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30760 /* SSE4.1 */
30761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30763 /* SSE4A */
30764 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30765 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30767 /* AVX */
30768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30771 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30772 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30773 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30798 /* AVX2 */
30799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30809 /* AVX512F */
30810 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30811 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30812 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30813 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30814 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30815 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30858 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30859 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30860 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30861 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30862 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30863 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30865 /* FSGSBASE */
30866 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30867 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30868 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30869 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30870 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30871 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30872 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30873 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30875 /* RTM */
30876 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30877 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30878 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30880 /* AVX512BW */
30881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30882 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30883 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30884 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30886 /* AVX512VL */
30887 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30888 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30889 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30890 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30925 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30926 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30927 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30928 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30962 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30963 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30982 /* PCOMMIT. */
30983 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
30986 /* Builtins with variable number of arguments. */
30987 static const struct builtin_description bdesc_args[] =
30989 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30990 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30991 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30992 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30993 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30994 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30995 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30997 /* MMX */
30998 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30999 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31000 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31001 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31002 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31003 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31005 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31006 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31007 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31008 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31009 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31010 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31011 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31012 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31014 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31015 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31017 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31018 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31019 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31020 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31022 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31023 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31024 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31025 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31026 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31027 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31029 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31030 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31031 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31032 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31033 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31034 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31036 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31037 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31038 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31040 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31042 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31043 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31044 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31045 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31046 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31047 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31049 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31050 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31051 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31052 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31054 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31056 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31058 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31059 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31061 /* 3DNow! */
31062 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31063 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31064 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31065 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31067 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31068 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31069 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31070 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31071 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31072 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31073 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31074 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31075 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31076 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31077 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31078 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31079 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31080 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31081 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31083 /* 3DNow!A */
31084 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31085 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31086 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31087 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31088 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31089 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31091 /* SSE */
31092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31094 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31096 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31100 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31103 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31107 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31108 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31109 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31127 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31135 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31137 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31138 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31139 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31140 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31142 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31144 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31145 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31147 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31152 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31153 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31155 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31156 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31157 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31159 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31161 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31162 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31163 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31165 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31166 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31168 /* SSE MMX or 3Dnow!A */
31169 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31170 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31171 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31173 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31174 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31175 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31176 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31178 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31179 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31181 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31183 /* SSE2 */
31184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31190 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31193 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31202 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31203 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31207 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31209 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31210 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31211 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31212 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31213 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31215 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31240 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31244 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31246 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31247 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31249 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31252 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31253 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31255 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31257 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31258 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31259 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31260 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31261 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31262 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31263 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31264 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31266 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31267 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31269 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31275 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31281 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31301 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31320 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31351 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31353 /* SSE2 MMX */
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31357 /* SSE3 */
31358 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31359 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31361 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31362 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31364 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31365 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31366 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31368 /* SSSE3 */
31369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31370 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31371 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31373 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31374 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31376 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31377 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31378 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31379 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31380 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31381 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31382 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31383 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31384 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31385 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31386 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31387 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31388 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31389 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31390 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31391 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31392 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31393 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31394 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31395 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31396 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31397 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31398 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31399 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31401 /* SSSE3. */
31402 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31403 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31405 /* SSE4.1 */
31406 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31407 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31408 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31409 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31410 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31411 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31412 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31413 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31414 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31415 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31417 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31418 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31419 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31420 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31421 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31422 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31423 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31424 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31425 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31426 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31427 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31428 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31429 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31431 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31432 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31433 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31434 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31435 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31436 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31437 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31438 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31439 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31440 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31441 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31442 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31444 /* SSE4.1 */
31445 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31446 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31447 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31448 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31450 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31451 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31452 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31453 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31455 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31456 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31458 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31459 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31461 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31462 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31463 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31464 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31466 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31467 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31469 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31470 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31472 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31473 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31474 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31476 /* SSE4.2 */
31477 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31478 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31479 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31480 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31481 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31483 /* SSE4A */
31484 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31485 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31486 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31487 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31489 /* AES */
31490 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31491 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31494 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31495 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31496 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31498 /* PCLMUL */
31499 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31501 /* AVX */
31502 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31503 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31504 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31505 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31506 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31507 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31508 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31509 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31510 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31511 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31512 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31513 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31514 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31515 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31516 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31517 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31518 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31519 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31520 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31521 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31522 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31523 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31524 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31525 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31526 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31527 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31529 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31530 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31531 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31532 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31534 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31535 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31536 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31537 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31538 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31539 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31540 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31543 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31544 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31550 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31551 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31555 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31557 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31558 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31562 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31571 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31573 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31574 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31575 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31577 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31579 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31581 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31642 /* AVX2 */
31643 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31644 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31645 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31646 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31647 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31648 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31649 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31650 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31651 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31652 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31653 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31654 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31655 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31656 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31657 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31658 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31659 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31660 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31661 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31662 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31663 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31664 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31665 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31666 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31667 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31668 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31669 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31670 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31671 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31672 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31673 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31674 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31675 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31676 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31677 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31678 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31679 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31680 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31681 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31682 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31683 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31684 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31685 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31686 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31687 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31688 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31689 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31690 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31691 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31692 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31693 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31694 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31695 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31696 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31697 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31698 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31699 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31700 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31701 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31702 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31703 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31704 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31705 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31706 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31707 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31708 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31709 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31710 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31711 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31712 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31713 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31714 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31715 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31716 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31717 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31718 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31719 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31720 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31721 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31722 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31723 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31724 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31725 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31726 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31790 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31792 /* BMI */
31793 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31794 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31795 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31797 /* TBM */
31798 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31799 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31801 /* F16C */
31802 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31803 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31804 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31805 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31807 /* BMI2 */
31808 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31809 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31810 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31811 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31812 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31813 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31815 /* AVX512F */
31816 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31817 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31818 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31819 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31820 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31821 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31822 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31823 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31824 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31825 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31826 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31827 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31828 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31829 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31830 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31831 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31832 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31833 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31834 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31835 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31836 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31837 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31838 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31839 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31840 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31841 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31842 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31843 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31844 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31845 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31846 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31847 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31848 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31849 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31850 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31851 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31852 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31853 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31854 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31855 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31856 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31857 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31858 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31859 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31860 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31861 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31862 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31863 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31864 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31865 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31866 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31867 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31868 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31869 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31870 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31871 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31872 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31873 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31874 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31875 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31876 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31877 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31878 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31879 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31880 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31881 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31882 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31883 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31884 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31885 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31886 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31887 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31888 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31889 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31890 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31891 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31892 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31893 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31982 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31983 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31984 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31985 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32017 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32022 /* Mask arithmetic operations */
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32034 /* SHA */
32035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32043 /* AVX512VL. */
32044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32045 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32054 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32055 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32056 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32057 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32082 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32083 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32084 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32085 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32086 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32087 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32088 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32089 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32090 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32091 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32092 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32093 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32094 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32099 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32100 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32101 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32102 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32103 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32104 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32105 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32106 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32107 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32108 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32111 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32112 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32113 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32114 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32121 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32122 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32123 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32124 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32125 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32126 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32127 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32128 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32129 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32136 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32137 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32141 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32154 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32155 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32158 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32159 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32171 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32182 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32183 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32184 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32185 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32186 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32187 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32188 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32190 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32193 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32195 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32208 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32209 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32212 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32213 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32216 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32224 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32225 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32228 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32229 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32230 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32231 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32232 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32233 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32236 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32237 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32238 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32239 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32244 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32245 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32246 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32247 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32248 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32249 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32272 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32273 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32276 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32277 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32280 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32281 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32282 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32283 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32300 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32301 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32302 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32303 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32304 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32305 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32306 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32308 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32309 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32310 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32311 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32312 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32313 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32314 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32315 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32316 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32317 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32318 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32321 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32324 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32325 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32366 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32367 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32426 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32427 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32428 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32429 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32430 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32431 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32432 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32433 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32434 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32435 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32440 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32441 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32443 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32454 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32455 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32456 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32457 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32458 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32459 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32460 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32461 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32522 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32523 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32524 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32525 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32534 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32535 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32536 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32537 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32538 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32539 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32540 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32541 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32542 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32543 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32545 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32546 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32547 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32552 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32553 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32554 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32555 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32556 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32557 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32558 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32559 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32564 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32565 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32580 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32581 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32588 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32589 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32590 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32591 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32638 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32639 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32648 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32649 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32650 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32651 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32659 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32660 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32661 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32662 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32680 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32687 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32688 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32689 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32702 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32703 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32704 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32721 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32722 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32723 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32724 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32740 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32757 /* AVX512DQ. */
32758 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32759 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32760 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32761 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32762 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32763 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32764 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32765 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32766 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32767 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32768 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32769 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32770 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32771 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32772 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32773 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32774 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32775 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32776 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32777 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32778 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32779 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32780 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32781 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32782 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32783 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32784 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32785 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32786 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32787 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32788 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32790 /* AVX512BW. */
32791 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32792 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32793 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32794 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32795 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32796 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32797 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32798 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32799 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32800 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32801 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32802 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32803 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32804 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32805 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32806 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32807 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32808 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32809 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32810 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32811 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32812 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32813 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32814 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32815 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32816 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32817 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32818 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32819 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32820 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32821 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32822 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32823 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32824 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32825 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32826 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32827 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32828 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32829 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32830 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32831 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32832 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32833 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32834 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32835 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32836 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32837 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32838 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32839 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32840 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32841 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32842 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32843 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32844 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32845 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32846 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32847 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32848 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32849 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32850 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32851 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32852 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32853 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32854 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32855 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32856 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32857 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32858 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32859 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32860 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32861 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32862 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32863 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32864 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32865 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32866 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32867 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32868 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32869 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32870 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32871 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32872 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32873 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32874 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32875 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32876 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32877 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32878 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32880 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32883 /* AVX512IFMA */
32884 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32885 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32886 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32887 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32888 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32889 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32890 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32891 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32892 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32893 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32894 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32895 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32897 /* AVX512VBMI */
32898 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32899 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32900 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32901 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32902 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32903 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32904 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32905 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32906 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32907 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32908 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32909 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32910 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32911 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32912 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32915 /* Builtins with rounding support. */
32916 static const struct builtin_description bdesc_round_args[] =
32918 /* AVX512F */
32919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32938 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32940 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32947 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32949 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32999 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33001 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33003 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33005 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33007 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33009 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33011 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33013 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33039 /* AVX512ER */
33040 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33041 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33042 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33043 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33044 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33045 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33046 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33047 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33048 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33049 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33051 /* AVX512DQ. */
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33055 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33056 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33057 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33058 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33059 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33060 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33061 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33062 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33063 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33064 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33065 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33066 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33067 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33070 /* Bultins for MPX. */
33071 static const struct builtin_description bdesc_mpx[] =
33073 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33074 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33075 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33078 /* Const builtins for MPX. */
33079 static const struct builtin_description bdesc_mpx_const[] =
33081 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33082 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33083 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33084 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33085 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33086 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33087 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33088 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33091 /* FMA4 and XOP. */
33092 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33093 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33094 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33095 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33096 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33097 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33098 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33099 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33100 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33101 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33102 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33103 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33104 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33105 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33106 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33107 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33108 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33109 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33110 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33111 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33112 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33113 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33114 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33115 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33116 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33117 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33118 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33119 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33120 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33121 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33122 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33123 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33124 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33125 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33126 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33127 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33128 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33129 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33130 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33131 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33132 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33133 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33134 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33135 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33136 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33137 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33138 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33139 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33140 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33141 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33142 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33143 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33145 static const struct builtin_description bdesc_multi_arg[] =
33147 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33148 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33149 UNKNOWN, (int)MULTI_ARG_3_SF },
33150 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33151 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33152 UNKNOWN, (int)MULTI_ARG_3_DF },
33154 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33155 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33156 UNKNOWN, (int)MULTI_ARG_3_SF },
33157 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33158 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33159 UNKNOWN, (int)MULTI_ARG_3_DF },
33161 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33162 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33163 UNKNOWN, (int)MULTI_ARG_3_SF },
33164 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33165 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33166 UNKNOWN, (int)MULTI_ARG_3_DF },
33167 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33168 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33169 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33170 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33171 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33172 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33174 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33175 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33176 UNKNOWN, (int)MULTI_ARG_3_SF },
33177 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33178 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33179 UNKNOWN, (int)MULTI_ARG_3_DF },
33180 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33181 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33182 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33183 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33184 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33185 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33347 /* TM vector builtins. */
33349 /* Reuse the existing x86-specific `struct builtin_description' cause
33350 we're lazy. Add casts to make them fit. */
33351 static const struct builtin_description bdesc_tm[] =
33353 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33354 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33355 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33356 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33357 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33358 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33359 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33361 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33362 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33363 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33364 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33365 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33366 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33367 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33369 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33370 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33371 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33372 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33373 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33374 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33375 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33377 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33378 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33379 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33382 /* TM callbacks. */
33384 /* Return the builtin decl needed to load a vector of TYPE. */
33386 static tree
33387 ix86_builtin_tm_load (tree type)
33389 if (TREE_CODE (type) == VECTOR_TYPE)
33391 switch (tree_to_uhwi (TYPE_SIZE (type)))
33393 case 64:
33394 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33395 case 128:
33396 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33397 case 256:
33398 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33401 return NULL_TREE;
33404 /* Return the builtin decl needed to store a vector of TYPE. */
33406 static tree
33407 ix86_builtin_tm_store (tree type)
33409 if (TREE_CODE (type) == VECTOR_TYPE)
33411 switch (tree_to_uhwi (TYPE_SIZE (type)))
33413 case 64:
33414 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33415 case 128:
33416 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33417 case 256:
33418 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33421 return NULL_TREE;
33424 /* Initialize the transactional memory vector load/store builtins. */
33426 static void
33427 ix86_init_tm_builtins (void)
33429 enum ix86_builtin_func_type ftype;
33430 const struct builtin_description *d;
33431 size_t i;
33432 tree decl;
33433 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33434 tree attrs_log, attrs_type_log;
33436 if (!flag_tm)
33437 return;
33439 /* If there are no builtins defined, we must be compiling in a
33440 language without trans-mem support. */
33441 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33442 return;
33444 /* Use whatever attributes a normal TM load has. */
33445 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33446 attrs_load = DECL_ATTRIBUTES (decl);
33447 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33448 /* Use whatever attributes a normal TM store has. */
33449 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33450 attrs_store = DECL_ATTRIBUTES (decl);
33451 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33452 /* Use whatever attributes a normal TM log has. */
33453 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33454 attrs_log = DECL_ATTRIBUTES (decl);
33455 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33457 for (i = 0, d = bdesc_tm;
33458 i < ARRAY_SIZE (bdesc_tm);
33459 i++, d++)
33461 if ((d->mask & ix86_isa_flags) != 0
33462 || (lang_hooks.builtin_function
33463 == lang_hooks.builtin_function_ext_scope))
33465 tree type, attrs, attrs_type;
33466 enum built_in_function code = (enum built_in_function) d->code;
33468 ftype = (enum ix86_builtin_func_type) d->flag;
33469 type = ix86_get_builtin_func_type (ftype);
33471 if (BUILTIN_TM_LOAD_P (code))
33473 attrs = attrs_load;
33474 attrs_type = attrs_type_load;
33476 else if (BUILTIN_TM_STORE_P (code))
33478 attrs = attrs_store;
33479 attrs_type = attrs_type_store;
33481 else
33483 attrs = attrs_log;
33484 attrs_type = attrs_type_log;
33486 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33487 /* The builtin without the prefix for
33488 calling it directly. */
33489 d->name + strlen ("__builtin_"),
33490 attrs);
33491 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33492 set the TYPE_ATTRIBUTES. */
33493 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33495 set_builtin_decl (code, decl, false);
33500 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33501 in the current target ISA to allow the user to compile particular modules
33502 with different target specific options that differ from the command line
33503 options. */
33504 static void
33505 ix86_init_mmx_sse_builtins (void)
33507 const struct builtin_description * d;
33508 enum ix86_builtin_func_type ftype;
33509 size_t i;
33511 /* Add all special builtins with variable number of operands. */
33512 for (i = 0, d = bdesc_special_args;
33513 i < ARRAY_SIZE (bdesc_special_args);
33514 i++, d++)
33516 if (d->name == 0)
33517 continue;
33519 ftype = (enum ix86_builtin_func_type) d->flag;
33520 def_builtin (d->mask, d->name, ftype, d->code);
33523 /* Add all builtins with variable number of operands. */
33524 for (i = 0, d = bdesc_args;
33525 i < ARRAY_SIZE (bdesc_args);
33526 i++, d++)
33528 if (d->name == 0)
33529 continue;
33531 ftype = (enum ix86_builtin_func_type) d->flag;
33532 def_builtin_const (d->mask, d->name, ftype, d->code);
33535 /* Add all builtins with rounding. */
33536 for (i = 0, d = bdesc_round_args;
33537 i < ARRAY_SIZE (bdesc_round_args);
33538 i++, d++)
33540 if (d->name == 0)
33541 continue;
33543 ftype = (enum ix86_builtin_func_type) d->flag;
33544 def_builtin_const (d->mask, d->name, ftype, d->code);
33547 /* pcmpestr[im] insns. */
33548 for (i = 0, d = bdesc_pcmpestr;
33549 i < ARRAY_SIZE (bdesc_pcmpestr);
33550 i++, d++)
33552 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33553 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33554 else
33555 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33556 def_builtin_const (d->mask, d->name, ftype, d->code);
33559 /* pcmpistr[im] insns. */
33560 for (i = 0, d = bdesc_pcmpistr;
33561 i < ARRAY_SIZE (bdesc_pcmpistr);
33562 i++, d++)
33564 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33565 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33566 else
33567 ftype = INT_FTYPE_V16QI_V16QI_INT;
33568 def_builtin_const (d->mask, d->name, ftype, d->code);
33571 /* comi/ucomi insns. */
33572 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33574 if (d->mask == OPTION_MASK_ISA_SSE2)
33575 ftype = INT_FTYPE_V2DF_V2DF;
33576 else
33577 ftype = INT_FTYPE_V4SF_V4SF;
33578 def_builtin_const (d->mask, d->name, ftype, d->code);
33581 /* SSE */
33582 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33583 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33584 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33585 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33587 /* SSE or 3DNow!A */
33588 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33589 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33590 IX86_BUILTIN_MASKMOVQ);
33592 /* SSE2 */
33593 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33594 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33596 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33597 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33598 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33599 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33601 /* SSE3. */
33602 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33603 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33604 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33605 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33607 /* AES */
33608 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33609 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33610 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33611 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33612 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33613 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33614 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33615 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33616 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33617 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33618 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33619 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33621 /* PCLMUL */
33622 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33623 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33625 /* RDRND */
33626 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33627 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33628 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33629 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33630 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33631 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33632 IX86_BUILTIN_RDRAND64_STEP);
33634 /* AVX2 */
33635 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33636 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33637 IX86_BUILTIN_GATHERSIV2DF);
33639 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33640 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33641 IX86_BUILTIN_GATHERSIV4DF);
33643 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33644 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33645 IX86_BUILTIN_GATHERDIV2DF);
33647 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33648 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33649 IX86_BUILTIN_GATHERDIV4DF);
33651 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33652 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33653 IX86_BUILTIN_GATHERSIV4SF);
33655 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33656 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33657 IX86_BUILTIN_GATHERSIV8SF);
33659 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33660 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33661 IX86_BUILTIN_GATHERDIV4SF);
33663 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33664 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33665 IX86_BUILTIN_GATHERDIV8SF);
33667 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33668 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33669 IX86_BUILTIN_GATHERSIV2DI);
33671 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33672 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33673 IX86_BUILTIN_GATHERSIV4DI);
33675 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33676 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33677 IX86_BUILTIN_GATHERDIV2DI);
33679 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33680 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33681 IX86_BUILTIN_GATHERDIV4DI);
33683 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33684 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33685 IX86_BUILTIN_GATHERSIV4SI);
33687 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33688 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33689 IX86_BUILTIN_GATHERSIV8SI);
33691 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33692 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33693 IX86_BUILTIN_GATHERDIV4SI);
33695 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33696 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33697 IX86_BUILTIN_GATHERDIV8SI);
33699 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33700 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33701 IX86_BUILTIN_GATHERALTSIV4DF);
33703 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33704 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33705 IX86_BUILTIN_GATHERALTDIV8SF);
33707 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33708 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33709 IX86_BUILTIN_GATHERALTSIV4DI);
33711 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33712 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33713 IX86_BUILTIN_GATHERALTDIV8SI);
33715 /* AVX512F */
33716 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33717 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33718 IX86_BUILTIN_GATHER3SIV16SF);
33720 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33721 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33722 IX86_BUILTIN_GATHER3SIV8DF);
33724 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33725 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33726 IX86_BUILTIN_GATHER3DIV16SF);
33728 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33729 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33730 IX86_BUILTIN_GATHER3DIV8DF);
33732 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33733 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33734 IX86_BUILTIN_GATHER3SIV16SI);
33736 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33737 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33738 IX86_BUILTIN_GATHER3SIV8DI);
33740 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33741 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33742 IX86_BUILTIN_GATHER3DIV16SI);
33744 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33745 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33746 IX86_BUILTIN_GATHER3DIV8DI);
33748 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33749 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33750 IX86_BUILTIN_GATHER3ALTSIV8DF);
33752 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33753 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33754 IX86_BUILTIN_GATHER3ALTDIV16SF);
33756 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33757 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33758 IX86_BUILTIN_GATHER3ALTSIV8DI);
33760 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33761 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33762 IX86_BUILTIN_GATHER3ALTDIV16SI);
33764 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33765 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33766 IX86_BUILTIN_SCATTERSIV16SF);
33768 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33769 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33770 IX86_BUILTIN_SCATTERSIV8DF);
33772 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33773 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33774 IX86_BUILTIN_SCATTERDIV16SF);
33776 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33777 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33778 IX86_BUILTIN_SCATTERDIV8DF);
33780 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33781 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33782 IX86_BUILTIN_SCATTERSIV16SI);
33784 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33785 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33786 IX86_BUILTIN_SCATTERSIV8DI);
33788 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33789 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33790 IX86_BUILTIN_SCATTERDIV16SI);
33792 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33793 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33794 IX86_BUILTIN_SCATTERDIV8DI);
33796 /* AVX512VL */
33797 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33798 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33799 IX86_BUILTIN_GATHER3SIV2DF);
33801 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33802 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33803 IX86_BUILTIN_GATHER3SIV4DF);
33805 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33806 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33807 IX86_BUILTIN_GATHER3DIV2DF);
33809 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33810 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33811 IX86_BUILTIN_GATHER3DIV4DF);
33813 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33814 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33815 IX86_BUILTIN_GATHER3SIV4SF);
33817 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33818 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33819 IX86_BUILTIN_GATHER3SIV8SF);
33821 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33822 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33823 IX86_BUILTIN_GATHER3DIV4SF);
33825 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33826 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33827 IX86_BUILTIN_GATHER3DIV8SF);
33829 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33830 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33831 IX86_BUILTIN_GATHER3SIV2DI);
33833 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33834 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33835 IX86_BUILTIN_GATHER3SIV4DI);
33837 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33838 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33839 IX86_BUILTIN_GATHER3DIV2DI);
33841 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33842 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33843 IX86_BUILTIN_GATHER3DIV4DI);
33845 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33846 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33847 IX86_BUILTIN_GATHER3SIV4SI);
33849 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33850 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33851 IX86_BUILTIN_GATHER3SIV8SI);
33853 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33854 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33855 IX86_BUILTIN_GATHER3DIV4SI);
33857 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33858 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33859 IX86_BUILTIN_GATHER3DIV8SI);
33861 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33862 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33863 IX86_BUILTIN_GATHER3ALTSIV4DF);
33865 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33866 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33867 IX86_BUILTIN_GATHER3ALTDIV8SF);
33869 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33870 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33871 IX86_BUILTIN_GATHER3ALTSIV4DI);
33873 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33874 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33875 IX86_BUILTIN_GATHER3ALTDIV8SI);
33877 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33878 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33879 IX86_BUILTIN_SCATTERSIV8SF);
33881 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33882 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33883 IX86_BUILTIN_SCATTERSIV4SF);
33885 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33886 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33887 IX86_BUILTIN_SCATTERSIV4DF);
33889 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33890 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33891 IX86_BUILTIN_SCATTERSIV2DF);
33893 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33894 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33895 IX86_BUILTIN_SCATTERDIV8SF);
33897 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33898 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33899 IX86_BUILTIN_SCATTERDIV4SF);
33901 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33902 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33903 IX86_BUILTIN_SCATTERDIV4DF);
33905 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33906 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33907 IX86_BUILTIN_SCATTERDIV2DF);
33909 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33910 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33911 IX86_BUILTIN_SCATTERSIV8SI);
33913 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33914 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33915 IX86_BUILTIN_SCATTERSIV4SI);
33917 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33918 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33919 IX86_BUILTIN_SCATTERSIV4DI);
33921 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33922 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33923 IX86_BUILTIN_SCATTERSIV2DI);
33925 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33926 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33927 IX86_BUILTIN_SCATTERDIV8SI);
33929 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33930 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33931 IX86_BUILTIN_SCATTERDIV4SI);
33933 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33934 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33935 IX86_BUILTIN_SCATTERDIV4DI);
33937 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33938 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33939 IX86_BUILTIN_SCATTERDIV2DI);
33941 /* AVX512PF */
33942 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33943 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33944 IX86_BUILTIN_GATHERPFDPD);
33945 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33946 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33947 IX86_BUILTIN_GATHERPFDPS);
33948 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33949 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33950 IX86_BUILTIN_GATHERPFQPD);
33951 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33952 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33953 IX86_BUILTIN_GATHERPFQPS);
33954 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33955 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33956 IX86_BUILTIN_SCATTERPFDPD);
33957 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33958 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33959 IX86_BUILTIN_SCATTERPFDPS);
33960 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33961 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33962 IX86_BUILTIN_SCATTERPFQPD);
33963 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33964 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33965 IX86_BUILTIN_SCATTERPFQPS);
33967 /* SHA */
33968 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33969 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33970 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33971 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33972 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33973 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33974 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33975 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33976 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33977 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33978 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33979 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33980 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33981 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33983 /* RTM. */
33984 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33985 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33987 /* MMX access to the vec_init patterns. */
33988 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33989 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33991 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33992 V4HI_FTYPE_HI_HI_HI_HI,
33993 IX86_BUILTIN_VEC_INIT_V4HI);
33995 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33996 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33997 IX86_BUILTIN_VEC_INIT_V8QI);
33999 /* Access to the vec_extract patterns. */
34000 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34001 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34002 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34003 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34004 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34005 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34006 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34007 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34008 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34009 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34011 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34012 "__builtin_ia32_vec_ext_v4hi",
34013 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34015 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34016 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34018 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34019 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34021 /* Access to the vec_set patterns. */
34022 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34023 "__builtin_ia32_vec_set_v2di",
34024 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34026 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34027 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34029 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34030 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34032 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34033 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34035 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34036 "__builtin_ia32_vec_set_v4hi",
34037 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34039 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34040 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34042 /* RDSEED */
34043 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34044 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34045 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34046 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34047 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34048 "__builtin_ia32_rdseed_di_step",
34049 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34051 /* ADCX */
34052 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34053 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34054 def_builtin (OPTION_MASK_ISA_64BIT,
34055 "__builtin_ia32_addcarryx_u64",
34056 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34057 IX86_BUILTIN_ADDCARRYX64);
34059 /* SBB */
34060 def_builtin (0, "__builtin_ia32_sbb_u32",
34061 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34062 def_builtin (OPTION_MASK_ISA_64BIT,
34063 "__builtin_ia32_sbb_u64",
34064 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34065 IX86_BUILTIN_SBB64);
34067 /* Read/write FLAGS. */
34068 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34069 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34070 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34071 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34072 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34073 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34074 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34075 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34077 /* CLFLUSHOPT. */
34078 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34079 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34081 /* CLWB. */
34082 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34083 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34085 /* Add FMA4 multi-arg argument instructions */
34086 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34088 if (d->name == 0)
34089 continue;
34091 ftype = (enum ix86_builtin_func_type) d->flag;
34092 def_builtin_const (d->mask, d->name, ftype, d->code);
34096 static void
34097 ix86_init_mpx_builtins ()
34099 const struct builtin_description * d;
34100 enum ix86_builtin_func_type ftype;
34101 tree decl;
34102 size_t i;
34104 for (i = 0, d = bdesc_mpx;
34105 i < ARRAY_SIZE (bdesc_mpx);
34106 i++, d++)
34108 if (d->name == 0)
34109 continue;
34111 ftype = (enum ix86_builtin_func_type) d->flag;
34112 decl = def_builtin (d->mask, d->name, ftype, d->code);
34114 /* With no leaf and nothrow flags for MPX builtins
34115 abnormal edges may follow its call when setjmp
34116 presents in the function. Since we may have a lot
34117 of MPX builtins calls it causes lots of useless
34118 edges and enormous PHI nodes. To avoid this we mark
34119 MPX builtins as leaf and nothrow. */
34120 if (decl)
34122 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34123 NULL_TREE);
34124 TREE_NOTHROW (decl) = 1;
34126 else
34128 ix86_builtins_isa[(int)d->code].leaf_p = true;
34129 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34133 for (i = 0, d = bdesc_mpx_const;
34134 i < ARRAY_SIZE (bdesc_mpx_const);
34135 i++, d++)
34137 if (d->name == 0)
34138 continue;
34140 ftype = (enum ix86_builtin_func_type) d->flag;
34141 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34143 if (decl)
34145 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34146 NULL_TREE);
34147 TREE_NOTHROW (decl) = 1;
34149 else
34151 ix86_builtins_isa[(int)d->code].leaf_p = true;
34152 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34157 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34158 to return a pointer to VERSION_DECL if the outcome of the expression
34159 formed by PREDICATE_CHAIN is true. This function will be called during
34160 version dispatch to decide which function version to execute. It returns
34161 the basic block at the end, to which more conditions can be added. */
34163 static basic_block
34164 add_condition_to_bb (tree function_decl, tree version_decl,
34165 tree predicate_chain, basic_block new_bb)
34167 gimple return_stmt;
34168 tree convert_expr, result_var;
34169 gimple convert_stmt;
34170 gimple call_cond_stmt;
34171 gimple if_else_stmt;
34173 basic_block bb1, bb2, bb3;
34174 edge e12, e23;
34176 tree cond_var, and_expr_var = NULL_TREE;
34177 gimple_seq gseq;
34179 tree predicate_decl, predicate_arg;
34181 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34183 gcc_assert (new_bb != NULL);
34184 gseq = bb_seq (new_bb);
34187 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34188 build_fold_addr_expr (version_decl));
34189 result_var = create_tmp_var (ptr_type_node);
34190 convert_stmt = gimple_build_assign (result_var, convert_expr);
34191 return_stmt = gimple_build_return (result_var);
34193 if (predicate_chain == NULL_TREE)
34195 gimple_seq_add_stmt (&gseq, convert_stmt);
34196 gimple_seq_add_stmt (&gseq, return_stmt);
34197 set_bb_seq (new_bb, gseq);
34198 gimple_set_bb (convert_stmt, new_bb);
34199 gimple_set_bb (return_stmt, new_bb);
34200 pop_cfun ();
34201 return new_bb;
34204 while (predicate_chain != NULL)
34206 cond_var = create_tmp_var (integer_type_node);
34207 predicate_decl = TREE_PURPOSE (predicate_chain);
34208 predicate_arg = TREE_VALUE (predicate_chain);
34209 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34210 gimple_call_set_lhs (call_cond_stmt, cond_var);
34212 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34213 gimple_set_bb (call_cond_stmt, new_bb);
34214 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34216 predicate_chain = TREE_CHAIN (predicate_chain);
34218 if (and_expr_var == NULL)
34219 and_expr_var = cond_var;
34220 else
34222 gimple assign_stmt;
34223 /* Use MIN_EXPR to check if any integer is zero?.
34224 and_expr_var = min_expr <cond_var, and_expr_var> */
34225 assign_stmt = gimple_build_assign (and_expr_var,
34226 build2 (MIN_EXPR, integer_type_node,
34227 cond_var, and_expr_var));
34229 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34230 gimple_set_bb (assign_stmt, new_bb);
34231 gimple_seq_add_stmt (&gseq, assign_stmt);
34235 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34236 integer_zero_node,
34237 NULL_TREE, NULL_TREE);
34238 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34239 gimple_set_bb (if_else_stmt, new_bb);
34240 gimple_seq_add_stmt (&gseq, if_else_stmt);
34242 gimple_seq_add_stmt (&gseq, convert_stmt);
34243 gimple_seq_add_stmt (&gseq, return_stmt);
34244 set_bb_seq (new_bb, gseq);
34246 bb1 = new_bb;
34247 e12 = split_block (bb1, if_else_stmt);
34248 bb2 = e12->dest;
34249 e12->flags &= ~EDGE_FALLTHRU;
34250 e12->flags |= EDGE_TRUE_VALUE;
34252 e23 = split_block (bb2, return_stmt);
34254 gimple_set_bb (convert_stmt, bb2);
34255 gimple_set_bb (return_stmt, bb2);
34257 bb3 = e23->dest;
34258 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34260 remove_edge (e23);
34261 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34263 pop_cfun ();
34265 return bb3;
34268 /* This parses the attribute arguments to target in DECL and determines
34269 the right builtin to use to match the platform specification.
34270 It returns the priority value for this version decl. If PREDICATE_LIST
34271 is not NULL, it stores the list of cpu features that need to be checked
34272 before dispatching this function. */
34274 static unsigned int
34275 get_builtin_code_for_version (tree decl, tree *predicate_list)
34277 tree attrs;
34278 struct cl_target_option cur_target;
34279 tree target_node;
34280 struct cl_target_option *new_target;
34281 const char *arg_str = NULL;
34282 const char *attrs_str = NULL;
34283 char *tok_str = NULL;
34284 char *token;
34286 /* Priority of i386 features, greater value is higher priority. This is
34287 used to decide the order in which function dispatch must happen. For
34288 instance, a version specialized for SSE4.2 should be checked for dispatch
34289 before a version for SSE3, as SSE4.2 implies SSE3. */
34290 enum feature_priority
34292 P_ZERO = 0,
34293 P_MMX,
34294 P_SSE,
34295 P_SSE2,
34296 P_SSE3,
34297 P_SSSE3,
34298 P_PROC_SSSE3,
34299 P_SSE4_A,
34300 P_PROC_SSE4_A,
34301 P_SSE4_1,
34302 P_SSE4_2,
34303 P_PROC_SSE4_2,
34304 P_POPCNT,
34305 P_AVX,
34306 P_PROC_AVX,
34307 P_BMI,
34308 P_PROC_BMI,
34309 P_FMA4,
34310 P_XOP,
34311 P_PROC_XOP,
34312 P_FMA,
34313 P_PROC_FMA,
34314 P_BMI2,
34315 P_AVX2,
34316 P_PROC_AVX2,
34317 P_AVX512F,
34318 P_PROC_AVX512F
34321 enum feature_priority priority = P_ZERO;
34323 /* These are the target attribute strings for which a dispatcher is
34324 available, from fold_builtin_cpu. */
34326 static struct _feature_list
34328 const char *const name;
34329 const enum feature_priority priority;
34331 const feature_list[] =
34333 {"mmx", P_MMX},
34334 {"sse", P_SSE},
34335 {"sse2", P_SSE2},
34336 {"sse3", P_SSE3},
34337 {"sse4a", P_SSE4_A},
34338 {"ssse3", P_SSSE3},
34339 {"sse4.1", P_SSE4_1},
34340 {"sse4.2", P_SSE4_2},
34341 {"popcnt", P_POPCNT},
34342 {"avx", P_AVX},
34343 {"bmi", P_BMI},
34344 {"fma4", P_FMA4},
34345 {"xop", P_XOP},
34346 {"fma", P_FMA},
34347 {"bmi2", P_BMI2},
34348 {"avx2", P_AVX2},
34349 {"avx512f", P_AVX512F}
34353 static unsigned int NUM_FEATURES
34354 = sizeof (feature_list) / sizeof (struct _feature_list);
34356 unsigned int i;
34358 tree predicate_chain = NULL_TREE;
34359 tree predicate_decl, predicate_arg;
34361 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34362 gcc_assert (attrs != NULL);
34364 attrs = TREE_VALUE (TREE_VALUE (attrs));
34366 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34367 attrs_str = TREE_STRING_POINTER (attrs);
34369 /* Return priority zero for default function. */
34370 if (strcmp (attrs_str, "default") == 0)
34371 return 0;
34373 /* Handle arch= if specified. For priority, set it to be 1 more than
34374 the best instruction set the processor can handle. For instance, if
34375 there is a version for atom and a version for ssse3 (the highest ISA
34376 priority for atom), the atom version must be checked for dispatch
34377 before the ssse3 version. */
34378 if (strstr (attrs_str, "arch=") != NULL)
34380 cl_target_option_save (&cur_target, &global_options);
34381 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34382 &global_options_set);
34384 gcc_assert (target_node);
34385 new_target = TREE_TARGET_OPTION (target_node);
34386 gcc_assert (new_target);
34388 if (new_target->arch_specified && new_target->arch > 0)
34390 switch (new_target->arch)
34392 case PROCESSOR_CORE2:
34393 arg_str = "core2";
34394 priority = P_PROC_SSSE3;
34395 break;
34396 case PROCESSOR_NEHALEM:
34397 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34398 arg_str = "westmere";
34399 else
34400 /* We translate "arch=corei7" and "arch=nehalem" to
34401 "corei7" so that it will be mapped to M_INTEL_COREI7
34402 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34403 arg_str = "corei7";
34404 priority = P_PROC_SSE4_2;
34405 break;
34406 case PROCESSOR_SANDYBRIDGE:
34407 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34408 arg_str = "ivybridge";
34409 else
34410 arg_str = "sandybridge";
34411 priority = P_PROC_AVX;
34412 break;
34413 case PROCESSOR_HASWELL:
34414 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34415 arg_str = "broadwell";
34416 else
34417 arg_str = "haswell";
34418 priority = P_PROC_AVX2;
34419 break;
34420 case PROCESSOR_BONNELL:
34421 arg_str = "bonnell";
34422 priority = P_PROC_SSSE3;
34423 break;
34424 case PROCESSOR_KNL:
34425 arg_str = "knl";
34426 priority = P_PROC_AVX512F;
34427 break;
34428 case PROCESSOR_SILVERMONT:
34429 arg_str = "silvermont";
34430 priority = P_PROC_SSE4_2;
34431 break;
34432 case PROCESSOR_AMDFAM10:
34433 arg_str = "amdfam10h";
34434 priority = P_PROC_SSE4_A;
34435 break;
34436 case PROCESSOR_BTVER1:
34437 arg_str = "btver1";
34438 priority = P_PROC_SSE4_A;
34439 break;
34440 case PROCESSOR_BTVER2:
34441 arg_str = "btver2";
34442 priority = P_PROC_BMI;
34443 break;
34444 case PROCESSOR_BDVER1:
34445 arg_str = "bdver1";
34446 priority = P_PROC_XOP;
34447 break;
34448 case PROCESSOR_BDVER2:
34449 arg_str = "bdver2";
34450 priority = P_PROC_FMA;
34451 break;
34452 case PROCESSOR_BDVER3:
34453 arg_str = "bdver3";
34454 priority = P_PROC_FMA;
34455 break;
34456 case PROCESSOR_BDVER4:
34457 arg_str = "bdver4";
34458 priority = P_PROC_AVX2;
34459 break;
34463 cl_target_option_restore (&global_options, &cur_target);
34465 if (predicate_list && arg_str == NULL)
34467 error_at (DECL_SOURCE_LOCATION (decl),
34468 "No dispatcher found for the versioning attributes");
34469 return 0;
34472 if (predicate_list)
34474 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34475 /* For a C string literal the length includes the trailing NULL. */
34476 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34477 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34478 predicate_chain);
34482 /* Process feature name. */
34483 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34484 strcpy (tok_str, attrs_str);
34485 token = strtok (tok_str, ",");
34486 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34488 while (token != NULL)
34490 /* Do not process "arch=" */
34491 if (strncmp (token, "arch=", 5) == 0)
34493 token = strtok (NULL, ",");
34494 continue;
34496 for (i = 0; i < NUM_FEATURES; ++i)
34498 if (strcmp (token, feature_list[i].name) == 0)
34500 if (predicate_list)
34502 predicate_arg = build_string_literal (
34503 strlen (feature_list[i].name) + 1,
34504 feature_list[i].name);
34505 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34506 predicate_chain);
34508 /* Find the maximum priority feature. */
34509 if (feature_list[i].priority > priority)
34510 priority = feature_list[i].priority;
34512 break;
34515 if (predicate_list && i == NUM_FEATURES)
34517 error_at (DECL_SOURCE_LOCATION (decl),
34518 "No dispatcher found for %s", token);
34519 return 0;
34521 token = strtok (NULL, ",");
34523 free (tok_str);
34525 if (predicate_list && predicate_chain == NULL_TREE)
34527 error_at (DECL_SOURCE_LOCATION (decl),
34528 "No dispatcher found for the versioning attributes : %s",
34529 attrs_str);
34530 return 0;
34532 else if (predicate_list)
34534 predicate_chain = nreverse (predicate_chain);
34535 *predicate_list = predicate_chain;
34538 return priority;
34541 /* This compares the priority of target features in function DECL1
34542 and DECL2. It returns positive value if DECL1 is higher priority,
34543 negative value if DECL2 is higher priority and 0 if they are the
34544 same. */
34546 static int
34547 ix86_compare_version_priority (tree decl1, tree decl2)
34549 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34550 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34552 return (int)priority1 - (int)priority2;
34555 /* V1 and V2 point to function versions with different priorities
34556 based on the target ISA. This function compares their priorities. */
34558 static int
34559 feature_compare (const void *v1, const void *v2)
34561 typedef struct _function_version_info
34563 tree version_decl;
34564 tree predicate_chain;
34565 unsigned int dispatch_priority;
34566 } function_version_info;
34568 const function_version_info c1 = *(const function_version_info *)v1;
34569 const function_version_info c2 = *(const function_version_info *)v2;
34570 return (c2.dispatch_priority - c1.dispatch_priority);
34573 /* This function generates the dispatch function for
34574 multi-versioned functions. DISPATCH_DECL is the function which will
34575 contain the dispatch logic. FNDECLS are the function choices for
34576 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34577 in DISPATCH_DECL in which the dispatch code is generated. */
34579 static int
34580 dispatch_function_versions (tree dispatch_decl,
34581 void *fndecls_p,
34582 basic_block *empty_bb)
34584 tree default_decl;
34585 gimple ifunc_cpu_init_stmt;
34586 gimple_seq gseq;
34587 int ix;
34588 tree ele;
34589 vec<tree> *fndecls;
34590 unsigned int num_versions = 0;
34591 unsigned int actual_versions = 0;
34592 unsigned int i;
34594 struct _function_version_info
34596 tree version_decl;
34597 tree predicate_chain;
34598 unsigned int dispatch_priority;
34599 }*function_version_info;
34601 gcc_assert (dispatch_decl != NULL
34602 && fndecls_p != NULL
34603 && empty_bb != NULL);
34605 /*fndecls_p is actually a vector. */
34606 fndecls = static_cast<vec<tree> *> (fndecls_p);
34608 /* At least one more version other than the default. */
34609 num_versions = fndecls->length ();
34610 gcc_assert (num_versions >= 2);
34612 function_version_info = (struct _function_version_info *)
34613 XNEWVEC (struct _function_version_info, (num_versions - 1));
34615 /* The first version in the vector is the default decl. */
34616 default_decl = (*fndecls)[0];
34618 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34620 gseq = bb_seq (*empty_bb);
34621 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34622 constructors, so explicity call __builtin_cpu_init here. */
34623 ifunc_cpu_init_stmt = gimple_build_call_vec (
34624 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34625 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34626 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34627 set_bb_seq (*empty_bb, gseq);
34629 pop_cfun ();
34632 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34634 tree version_decl = ele;
34635 tree predicate_chain = NULL_TREE;
34636 unsigned int priority;
34637 /* Get attribute string, parse it and find the right predicate decl.
34638 The predicate function could be a lengthy combination of many
34639 features, like arch-type and various isa-variants. */
34640 priority = get_builtin_code_for_version (version_decl,
34641 &predicate_chain);
34643 if (predicate_chain == NULL_TREE)
34644 continue;
34646 function_version_info [actual_versions].version_decl = version_decl;
34647 function_version_info [actual_versions].predicate_chain
34648 = predicate_chain;
34649 function_version_info [actual_versions].dispatch_priority = priority;
34650 actual_versions++;
34653 /* Sort the versions according to descending order of dispatch priority. The
34654 priority is based on the ISA. This is not a perfect solution. There
34655 could still be ambiguity. If more than one function version is suitable
34656 to execute, which one should be dispatched? In future, allow the user
34657 to specify a dispatch priority next to the version. */
34658 qsort (function_version_info, actual_versions,
34659 sizeof (struct _function_version_info), feature_compare);
34661 for (i = 0; i < actual_versions; ++i)
34662 *empty_bb = add_condition_to_bb (dispatch_decl,
34663 function_version_info[i].version_decl,
34664 function_version_info[i].predicate_chain,
34665 *empty_bb);
34667 /* dispatch default version at the end. */
34668 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34669 NULL, *empty_bb);
34671 free (function_version_info);
34672 return 0;
34675 /* Comparator function to be used in qsort routine to sort attribute
34676 specification strings to "target". */
34678 static int
34679 attr_strcmp (const void *v1, const void *v2)
34681 const char *c1 = *(char *const*)v1;
34682 const char *c2 = *(char *const*)v2;
34683 return strcmp (c1, c2);
34686 /* ARGLIST is the argument to target attribute. This function tokenizes
34687 the comma separated arguments, sorts them and returns a string which
34688 is a unique identifier for the comma separated arguments. It also
34689 replaces non-identifier characters "=,-" with "_". */
34691 static char *
34692 sorted_attr_string (tree arglist)
34694 tree arg;
34695 size_t str_len_sum = 0;
34696 char **args = NULL;
34697 char *attr_str, *ret_str;
34698 char *attr = NULL;
34699 unsigned int argnum = 1;
34700 unsigned int i;
34702 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34704 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34705 size_t len = strlen (str);
34706 str_len_sum += len + 1;
34707 if (arg != arglist)
34708 argnum++;
34709 for (i = 0; i < strlen (str); i++)
34710 if (str[i] == ',')
34711 argnum++;
34714 attr_str = XNEWVEC (char, str_len_sum);
34715 str_len_sum = 0;
34716 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34718 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34719 size_t len = strlen (str);
34720 memcpy (attr_str + str_len_sum, str, len);
34721 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34722 str_len_sum += len + 1;
34725 /* Replace "=,-" with "_". */
34726 for (i = 0; i < strlen (attr_str); i++)
34727 if (attr_str[i] == '=' || attr_str[i]== '-')
34728 attr_str[i] = '_';
34730 if (argnum == 1)
34731 return attr_str;
34733 args = XNEWVEC (char *, argnum);
34735 i = 0;
34736 attr = strtok (attr_str, ",");
34737 while (attr != NULL)
34739 args[i] = attr;
34740 i++;
34741 attr = strtok (NULL, ",");
34744 qsort (args, argnum, sizeof (char *), attr_strcmp);
34746 ret_str = XNEWVEC (char, str_len_sum);
34747 str_len_sum = 0;
34748 for (i = 0; i < argnum; i++)
34750 size_t len = strlen (args[i]);
34751 memcpy (ret_str + str_len_sum, args[i], len);
34752 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34753 str_len_sum += len + 1;
34756 XDELETEVEC (args);
34757 XDELETEVEC (attr_str);
34758 return ret_str;
34761 /* This function changes the assembler name for functions that are
34762 versions. If DECL is a function version and has a "target"
34763 attribute, it appends the attribute string to its assembler name. */
34765 static tree
34766 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34768 tree version_attr;
34769 const char *orig_name, *version_string;
34770 char *attr_str, *assembler_name;
34772 if (DECL_DECLARED_INLINE_P (decl)
34773 && lookup_attribute ("gnu_inline",
34774 DECL_ATTRIBUTES (decl)))
34775 error_at (DECL_SOURCE_LOCATION (decl),
34776 "Function versions cannot be marked as gnu_inline,"
34777 " bodies have to be generated");
34779 if (DECL_VIRTUAL_P (decl)
34780 || DECL_VINDEX (decl))
34781 sorry ("Virtual function multiversioning not supported");
34783 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34785 /* target attribute string cannot be NULL. */
34786 gcc_assert (version_attr != NULL_TREE);
34788 orig_name = IDENTIFIER_POINTER (id);
34789 version_string
34790 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34792 if (strcmp (version_string, "default") == 0)
34793 return id;
34795 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34796 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34798 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34800 /* Allow assembler name to be modified if already set. */
34801 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34802 SET_DECL_RTL (decl, NULL);
34804 tree ret = get_identifier (assembler_name);
34805 XDELETEVEC (attr_str);
34806 XDELETEVEC (assembler_name);
34807 return ret;
34810 /* This function returns true if FN1 and FN2 are versions of the same function,
34811 that is, the target strings of the function decls are different. This assumes
34812 that FN1 and FN2 have the same signature. */
34814 static bool
34815 ix86_function_versions (tree fn1, tree fn2)
34817 tree attr1, attr2;
34818 char *target1, *target2;
34819 bool result;
34821 if (TREE_CODE (fn1) != FUNCTION_DECL
34822 || TREE_CODE (fn2) != FUNCTION_DECL)
34823 return false;
34825 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34826 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34828 /* At least one function decl should have the target attribute specified. */
34829 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34830 return false;
34832 /* Diagnose missing target attribute if one of the decls is already
34833 multi-versioned. */
34834 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34836 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34838 if (attr2 != NULL_TREE)
34840 tree tem = fn1;
34841 fn1 = fn2;
34842 fn2 = tem;
34843 attr1 = attr2;
34845 error_at (DECL_SOURCE_LOCATION (fn2),
34846 "missing %<target%> attribute for multi-versioned %D",
34847 fn2);
34848 inform (DECL_SOURCE_LOCATION (fn1),
34849 "previous declaration of %D", fn1);
34850 /* Prevent diagnosing of the same error multiple times. */
34851 DECL_ATTRIBUTES (fn2)
34852 = tree_cons (get_identifier ("target"),
34853 copy_node (TREE_VALUE (attr1)),
34854 DECL_ATTRIBUTES (fn2));
34856 return false;
34859 target1 = sorted_attr_string (TREE_VALUE (attr1));
34860 target2 = sorted_attr_string (TREE_VALUE (attr2));
34862 /* The sorted target strings must be different for fn1 and fn2
34863 to be versions. */
34864 if (strcmp (target1, target2) == 0)
34865 result = false;
34866 else
34867 result = true;
34869 XDELETEVEC (target1);
34870 XDELETEVEC (target2);
34872 return result;
34875 static tree
34876 ix86_mangle_decl_assembler_name (tree decl, tree id)
34878 /* For function version, add the target suffix to the assembler name. */
34879 if (TREE_CODE (decl) == FUNCTION_DECL
34880 && DECL_FUNCTION_VERSIONED (decl))
34881 id = ix86_mangle_function_version_assembler_name (decl, id);
34882 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34883 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34884 #endif
34886 return id;
34889 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34890 is true, append the full path name of the source file. */
34892 static char *
34893 make_name (tree decl, const char *suffix, bool make_unique)
34895 char *global_var_name;
34896 int name_len;
34897 const char *name;
34898 const char *unique_name = NULL;
34900 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34902 /* Get a unique name that can be used globally without any chances
34903 of collision at link time. */
34904 if (make_unique)
34905 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34907 name_len = strlen (name) + strlen (suffix) + 2;
34909 if (make_unique)
34910 name_len += strlen (unique_name) + 1;
34911 global_var_name = XNEWVEC (char, name_len);
34913 /* Use '.' to concatenate names as it is demangler friendly. */
34914 if (make_unique)
34915 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34916 suffix);
34917 else
34918 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34920 return global_var_name;
34923 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34925 /* Make a dispatcher declaration for the multi-versioned function DECL.
34926 Calls to DECL function will be replaced with calls to the dispatcher
34927 by the front-end. Return the decl created. */
34929 static tree
34930 make_dispatcher_decl (const tree decl)
34932 tree func_decl;
34933 char *func_name;
34934 tree fn_type, func_type;
34935 bool is_uniq = false;
34937 if (TREE_PUBLIC (decl) == 0)
34938 is_uniq = true;
34940 func_name = make_name (decl, "ifunc", is_uniq);
34942 fn_type = TREE_TYPE (decl);
34943 func_type = build_function_type (TREE_TYPE (fn_type),
34944 TYPE_ARG_TYPES (fn_type));
34946 func_decl = build_fn_decl (func_name, func_type);
34947 XDELETEVEC (func_name);
34948 TREE_USED (func_decl) = 1;
34949 DECL_CONTEXT (func_decl) = NULL_TREE;
34950 DECL_INITIAL (func_decl) = error_mark_node;
34951 DECL_ARTIFICIAL (func_decl) = 1;
34952 /* Mark this func as external, the resolver will flip it again if
34953 it gets generated. */
34954 DECL_EXTERNAL (func_decl) = 1;
34955 /* This will be of type IFUNCs have to be externally visible. */
34956 TREE_PUBLIC (func_decl) = 1;
34958 return func_decl;
34961 #endif
34963 /* Returns true if decl is multi-versioned and DECL is the default function,
34964 that is it is not tagged with target specific optimization. */
34966 static bool
34967 is_function_default_version (const tree decl)
34969 if (TREE_CODE (decl) != FUNCTION_DECL
34970 || !DECL_FUNCTION_VERSIONED (decl))
34971 return false;
34972 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34973 gcc_assert (attr);
34974 attr = TREE_VALUE (TREE_VALUE (attr));
34975 return (TREE_CODE (attr) == STRING_CST
34976 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34979 /* Make a dispatcher declaration for the multi-versioned function DECL.
34980 Calls to DECL function will be replaced with calls to the dispatcher
34981 by the front-end. Returns the decl of the dispatcher function. */
34983 static tree
34984 ix86_get_function_versions_dispatcher (void *decl)
34986 tree fn = (tree) decl;
34987 struct cgraph_node *node = NULL;
34988 struct cgraph_node *default_node = NULL;
34989 struct cgraph_function_version_info *node_v = NULL;
34990 struct cgraph_function_version_info *first_v = NULL;
34992 tree dispatch_decl = NULL;
34994 struct cgraph_function_version_info *default_version_info = NULL;
34996 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34998 node = cgraph_node::get (fn);
34999 gcc_assert (node != NULL);
35001 node_v = node->function_version ();
35002 gcc_assert (node_v != NULL);
35004 if (node_v->dispatcher_resolver != NULL)
35005 return node_v->dispatcher_resolver;
35007 /* Find the default version and make it the first node. */
35008 first_v = node_v;
35009 /* Go to the beginning of the chain. */
35010 while (first_v->prev != NULL)
35011 first_v = first_v->prev;
35012 default_version_info = first_v;
35013 while (default_version_info != NULL)
35015 if (is_function_default_version
35016 (default_version_info->this_node->decl))
35017 break;
35018 default_version_info = default_version_info->next;
35021 /* If there is no default node, just return NULL. */
35022 if (default_version_info == NULL)
35023 return NULL;
35025 /* Make default info the first node. */
35026 if (first_v != default_version_info)
35028 default_version_info->prev->next = default_version_info->next;
35029 if (default_version_info->next)
35030 default_version_info->next->prev = default_version_info->prev;
35031 first_v->prev = default_version_info;
35032 default_version_info->next = first_v;
35033 default_version_info->prev = NULL;
35036 default_node = default_version_info->this_node;
35038 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35039 if (targetm.has_ifunc_p ())
35041 struct cgraph_function_version_info *it_v = NULL;
35042 struct cgraph_node *dispatcher_node = NULL;
35043 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35045 /* Right now, the dispatching is done via ifunc. */
35046 dispatch_decl = make_dispatcher_decl (default_node->decl);
35048 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35049 gcc_assert (dispatcher_node != NULL);
35050 dispatcher_node->dispatcher_function = 1;
35051 dispatcher_version_info
35052 = dispatcher_node->insert_new_function_version ();
35053 dispatcher_version_info->next = default_version_info;
35054 dispatcher_node->definition = 1;
35056 /* Set the dispatcher for all the versions. */
35057 it_v = default_version_info;
35058 while (it_v != NULL)
35060 it_v->dispatcher_resolver = dispatch_decl;
35061 it_v = it_v->next;
35064 else
35065 #endif
35067 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35068 "multiversioning needs ifunc which is not supported "
35069 "on this target");
35072 return dispatch_decl;
35075 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35076 it to CHAIN. */
35078 static tree
35079 make_attribute (const char *name, const char *arg_name, tree chain)
35081 tree attr_name;
35082 tree attr_arg_name;
35083 tree attr_args;
35084 tree attr;
35086 attr_name = get_identifier (name);
35087 attr_arg_name = build_string (strlen (arg_name), arg_name);
35088 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35089 attr = tree_cons (attr_name, attr_args, chain);
35090 return attr;
35093 /* Make the resolver function decl to dispatch the versions of
35094 a multi-versioned function, DEFAULT_DECL. Create an
35095 empty basic block in the resolver and store the pointer in
35096 EMPTY_BB. Return the decl of the resolver function. */
35098 static tree
35099 make_resolver_func (const tree default_decl,
35100 const tree dispatch_decl,
35101 basic_block *empty_bb)
35103 char *resolver_name;
35104 tree decl, type, decl_name, t;
35105 bool is_uniq = false;
35107 /* IFUNC's have to be globally visible. So, if the default_decl is
35108 not, then the name of the IFUNC should be made unique. */
35109 if (TREE_PUBLIC (default_decl) == 0)
35110 is_uniq = true;
35112 /* Append the filename to the resolver function if the versions are
35113 not externally visible. This is because the resolver function has
35114 to be externally visible for the loader to find it. So, appending
35115 the filename will prevent conflicts with a resolver function from
35116 another module which is based on the same version name. */
35117 resolver_name = make_name (default_decl, "resolver", is_uniq);
35119 /* The resolver function should return a (void *). */
35120 type = build_function_type_list (ptr_type_node, NULL_TREE);
35122 decl = build_fn_decl (resolver_name, type);
35123 decl_name = get_identifier (resolver_name);
35124 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35126 DECL_NAME (decl) = decl_name;
35127 TREE_USED (decl) = 1;
35128 DECL_ARTIFICIAL (decl) = 1;
35129 DECL_IGNORED_P (decl) = 0;
35130 /* IFUNC resolvers have to be externally visible. */
35131 TREE_PUBLIC (decl) = 1;
35132 DECL_UNINLINABLE (decl) = 1;
35134 /* Resolver is not external, body is generated. */
35135 DECL_EXTERNAL (decl) = 0;
35136 DECL_EXTERNAL (dispatch_decl) = 0;
35138 DECL_CONTEXT (decl) = NULL_TREE;
35139 DECL_INITIAL (decl) = make_node (BLOCK);
35140 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35142 if (DECL_COMDAT_GROUP (default_decl)
35143 || TREE_PUBLIC (default_decl))
35145 /* In this case, each translation unit with a call to this
35146 versioned function will put out a resolver. Ensure it
35147 is comdat to keep just one copy. */
35148 DECL_COMDAT (decl) = 1;
35149 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35151 /* Build result decl and add to function_decl. */
35152 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35153 DECL_ARTIFICIAL (t) = 1;
35154 DECL_IGNORED_P (t) = 1;
35155 DECL_RESULT (decl) = t;
35157 gimplify_function_tree (decl);
35158 push_cfun (DECL_STRUCT_FUNCTION (decl));
35159 *empty_bb = init_lowered_empty_function (decl, false, 0);
35161 cgraph_node::add_new_function (decl, true);
35162 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35164 pop_cfun ();
35166 gcc_assert (dispatch_decl != NULL);
35167 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35168 DECL_ATTRIBUTES (dispatch_decl)
35169 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35171 /* Create the alias for dispatch to resolver here. */
35172 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35173 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35174 XDELETEVEC (resolver_name);
35175 return decl;
35178 /* Generate the dispatching code body to dispatch multi-versioned function
35179 DECL. The target hook is called to process the "target" attributes and
35180 provide the code to dispatch the right function at run-time. NODE points
35181 to the dispatcher decl whose body will be created. */
35183 static tree
35184 ix86_generate_version_dispatcher_body (void *node_p)
35186 tree resolver_decl;
35187 basic_block empty_bb;
35188 tree default_ver_decl;
35189 struct cgraph_node *versn;
35190 struct cgraph_node *node;
35192 struct cgraph_function_version_info *node_version_info = NULL;
35193 struct cgraph_function_version_info *versn_info = NULL;
35195 node = (cgraph_node *)node_p;
35197 node_version_info = node->function_version ();
35198 gcc_assert (node->dispatcher_function
35199 && node_version_info != NULL);
35201 if (node_version_info->dispatcher_resolver)
35202 return node_version_info->dispatcher_resolver;
35204 /* The first version in the chain corresponds to the default version. */
35205 default_ver_decl = node_version_info->next->this_node->decl;
35207 /* node is going to be an alias, so remove the finalized bit. */
35208 node->definition = false;
35210 resolver_decl = make_resolver_func (default_ver_decl,
35211 node->decl, &empty_bb);
35213 node_version_info->dispatcher_resolver = resolver_decl;
35215 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35217 auto_vec<tree, 2> fn_ver_vec;
35219 for (versn_info = node_version_info->next; versn_info;
35220 versn_info = versn_info->next)
35222 versn = versn_info->this_node;
35223 /* Check for virtual functions here again, as by this time it should
35224 have been determined if this function needs a vtable index or
35225 not. This happens for methods in derived classes that override
35226 virtual methods in base classes but are not explicitly marked as
35227 virtual. */
35228 if (DECL_VINDEX (versn->decl))
35229 sorry ("Virtual function multiversioning not supported");
35231 fn_ver_vec.safe_push (versn->decl);
35234 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35235 cgraph_edge::rebuild_edges ();
35236 pop_cfun ();
35237 return resolver_decl;
35239 /* This builds the processor_model struct type defined in
35240 libgcc/config/i386/cpuinfo.c */
35242 static tree
35243 build_processor_model_struct (void)
35245 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35246 "__cpu_features"};
35247 tree field = NULL_TREE, field_chain = NULL_TREE;
35248 int i;
35249 tree type = make_node (RECORD_TYPE);
35251 /* The first 3 fields are unsigned int. */
35252 for (i = 0; i < 3; ++i)
35254 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35255 get_identifier (field_name[i]), unsigned_type_node);
35256 if (field_chain != NULL_TREE)
35257 DECL_CHAIN (field) = field_chain;
35258 field_chain = field;
35261 /* The last field is an array of unsigned integers of size one. */
35262 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35263 get_identifier (field_name[3]),
35264 build_array_type (unsigned_type_node,
35265 build_index_type (size_one_node)));
35266 if (field_chain != NULL_TREE)
35267 DECL_CHAIN (field) = field_chain;
35268 field_chain = field;
35270 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35271 return type;
35274 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35276 static tree
35277 make_var_decl (tree type, const char *name)
35279 tree new_decl;
35281 new_decl = build_decl (UNKNOWN_LOCATION,
35282 VAR_DECL,
35283 get_identifier(name),
35284 type);
35286 DECL_EXTERNAL (new_decl) = 1;
35287 TREE_STATIC (new_decl) = 1;
35288 TREE_PUBLIC (new_decl) = 1;
35289 DECL_INITIAL (new_decl) = 0;
35290 DECL_ARTIFICIAL (new_decl) = 0;
35291 DECL_PRESERVE_P (new_decl) = 1;
35293 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35294 assemble_variable (new_decl, 0, 0, 0);
35296 return new_decl;
35299 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35300 into an integer defined in libgcc/config/i386/cpuinfo.c */
35302 static tree
35303 fold_builtin_cpu (tree fndecl, tree *args)
35305 unsigned int i;
35306 enum ix86_builtins fn_code = (enum ix86_builtins)
35307 DECL_FUNCTION_CODE (fndecl);
35308 tree param_string_cst = NULL;
35310 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35311 enum processor_features
35313 F_CMOV = 0,
35314 F_MMX,
35315 F_POPCNT,
35316 F_SSE,
35317 F_SSE2,
35318 F_SSE3,
35319 F_SSSE3,
35320 F_SSE4_1,
35321 F_SSE4_2,
35322 F_AVX,
35323 F_AVX2,
35324 F_SSE4_A,
35325 F_FMA4,
35326 F_XOP,
35327 F_FMA,
35328 F_AVX512F,
35329 F_BMI,
35330 F_BMI2,
35331 F_MAX
35334 /* These are the values for vendor types and cpu types and subtypes
35335 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35336 the corresponding start value. */
35337 enum processor_model
35339 M_INTEL = 1,
35340 M_AMD,
35341 M_CPU_TYPE_START,
35342 M_INTEL_BONNELL,
35343 M_INTEL_CORE2,
35344 M_INTEL_COREI7,
35345 M_AMDFAM10H,
35346 M_AMDFAM15H,
35347 M_INTEL_SILVERMONT,
35348 M_INTEL_KNL,
35349 M_AMD_BTVER1,
35350 M_AMD_BTVER2,
35351 M_CPU_SUBTYPE_START,
35352 M_INTEL_COREI7_NEHALEM,
35353 M_INTEL_COREI7_WESTMERE,
35354 M_INTEL_COREI7_SANDYBRIDGE,
35355 M_AMDFAM10H_BARCELONA,
35356 M_AMDFAM10H_SHANGHAI,
35357 M_AMDFAM10H_ISTANBUL,
35358 M_AMDFAM15H_BDVER1,
35359 M_AMDFAM15H_BDVER2,
35360 M_AMDFAM15H_BDVER3,
35361 M_AMDFAM15H_BDVER4,
35362 M_INTEL_COREI7_IVYBRIDGE,
35363 M_INTEL_COREI7_HASWELL,
35364 M_INTEL_COREI7_BROADWELL
35367 static struct _arch_names_table
35369 const char *const name;
35370 const enum processor_model model;
35372 const arch_names_table[] =
35374 {"amd", M_AMD},
35375 {"intel", M_INTEL},
35376 {"atom", M_INTEL_BONNELL},
35377 {"slm", M_INTEL_SILVERMONT},
35378 {"core2", M_INTEL_CORE2},
35379 {"corei7", M_INTEL_COREI7},
35380 {"nehalem", M_INTEL_COREI7_NEHALEM},
35381 {"westmere", M_INTEL_COREI7_WESTMERE},
35382 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35383 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35384 {"haswell", M_INTEL_COREI7_HASWELL},
35385 {"broadwell", M_INTEL_COREI7_BROADWELL},
35386 {"bonnell", M_INTEL_BONNELL},
35387 {"silvermont", M_INTEL_SILVERMONT},
35388 {"knl", M_INTEL_KNL},
35389 {"amdfam10h", M_AMDFAM10H},
35390 {"barcelona", M_AMDFAM10H_BARCELONA},
35391 {"shanghai", M_AMDFAM10H_SHANGHAI},
35392 {"istanbul", M_AMDFAM10H_ISTANBUL},
35393 {"btver1", M_AMD_BTVER1},
35394 {"amdfam15h", M_AMDFAM15H},
35395 {"bdver1", M_AMDFAM15H_BDVER1},
35396 {"bdver2", M_AMDFAM15H_BDVER2},
35397 {"bdver3", M_AMDFAM15H_BDVER3},
35398 {"bdver4", M_AMDFAM15H_BDVER4},
35399 {"btver2", M_AMD_BTVER2},
35402 static struct _isa_names_table
35404 const char *const name;
35405 const enum processor_features feature;
35407 const isa_names_table[] =
35409 {"cmov", F_CMOV},
35410 {"mmx", F_MMX},
35411 {"popcnt", F_POPCNT},
35412 {"sse", F_SSE},
35413 {"sse2", F_SSE2},
35414 {"sse3", F_SSE3},
35415 {"ssse3", F_SSSE3},
35416 {"sse4a", F_SSE4_A},
35417 {"sse4.1", F_SSE4_1},
35418 {"sse4.2", F_SSE4_2},
35419 {"avx", F_AVX},
35420 {"fma4", F_FMA4},
35421 {"xop", F_XOP},
35422 {"fma", F_FMA},
35423 {"avx2", F_AVX2},
35424 {"avx512f",F_AVX512F},
35425 {"bmi", F_BMI},
35426 {"bmi2", F_BMI2}
35429 tree __processor_model_type = build_processor_model_struct ();
35430 tree __cpu_model_var = make_var_decl (__processor_model_type,
35431 "__cpu_model");
35434 varpool_node::add (__cpu_model_var);
35436 gcc_assert ((args != NULL) && (*args != NULL));
35438 param_string_cst = *args;
35439 while (param_string_cst
35440 && TREE_CODE (param_string_cst) != STRING_CST)
35442 /* *args must be a expr that can contain other EXPRS leading to a
35443 STRING_CST. */
35444 if (!EXPR_P (param_string_cst))
35446 error ("Parameter to builtin must be a string constant or literal");
35447 return integer_zero_node;
35449 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35452 gcc_assert (param_string_cst);
35454 if (fn_code == IX86_BUILTIN_CPU_IS)
35456 tree ref;
35457 tree field;
35458 tree final;
35460 unsigned int field_val = 0;
35461 unsigned int NUM_ARCH_NAMES
35462 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35464 for (i = 0; i < NUM_ARCH_NAMES; i++)
35465 if (strcmp (arch_names_table[i].name,
35466 TREE_STRING_POINTER (param_string_cst)) == 0)
35467 break;
35469 if (i == NUM_ARCH_NAMES)
35471 error ("Parameter to builtin not valid: %s",
35472 TREE_STRING_POINTER (param_string_cst));
35473 return integer_zero_node;
35476 field = TYPE_FIELDS (__processor_model_type);
35477 field_val = arch_names_table[i].model;
35479 /* CPU types are stored in the next field. */
35480 if (field_val > M_CPU_TYPE_START
35481 && field_val < M_CPU_SUBTYPE_START)
35483 field = DECL_CHAIN (field);
35484 field_val -= M_CPU_TYPE_START;
35487 /* CPU subtypes are stored in the next field. */
35488 if (field_val > M_CPU_SUBTYPE_START)
35490 field = DECL_CHAIN ( DECL_CHAIN (field));
35491 field_val -= M_CPU_SUBTYPE_START;
35494 /* Get the appropriate field in __cpu_model. */
35495 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35496 field, NULL_TREE);
35498 /* Check the value. */
35499 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35500 build_int_cstu (unsigned_type_node, field_val));
35501 return build1 (CONVERT_EXPR, integer_type_node, final);
35503 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35505 tree ref;
35506 tree array_elt;
35507 tree field;
35508 tree final;
35510 unsigned int field_val = 0;
35511 unsigned int NUM_ISA_NAMES
35512 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35514 for (i = 0; i < NUM_ISA_NAMES; i++)
35515 if (strcmp (isa_names_table[i].name,
35516 TREE_STRING_POINTER (param_string_cst)) == 0)
35517 break;
35519 if (i == NUM_ISA_NAMES)
35521 error ("Parameter to builtin not valid: %s",
35522 TREE_STRING_POINTER (param_string_cst));
35523 return integer_zero_node;
35526 field = TYPE_FIELDS (__processor_model_type);
35527 /* Get the last field, which is __cpu_features. */
35528 while (DECL_CHAIN (field))
35529 field = DECL_CHAIN (field);
35531 /* Get the appropriate field: __cpu_model.__cpu_features */
35532 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35533 field, NULL_TREE);
35535 /* Access the 0th element of __cpu_features array. */
35536 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35537 integer_zero_node, NULL_TREE, NULL_TREE);
35539 field_val = (1 << isa_names_table[i].feature);
35540 /* Return __cpu_model.__cpu_features[0] & field_val */
35541 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35542 build_int_cstu (unsigned_type_node, field_val));
35543 return build1 (CONVERT_EXPR, integer_type_node, final);
35545 gcc_unreachable ();
35548 static tree
35549 ix86_fold_builtin (tree fndecl, int n_args,
35550 tree *args, bool ignore ATTRIBUTE_UNUSED)
35552 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35554 enum ix86_builtins fn_code = (enum ix86_builtins)
35555 DECL_FUNCTION_CODE (fndecl);
35556 if (fn_code == IX86_BUILTIN_CPU_IS
35557 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35559 gcc_assert (n_args == 1);
35560 return fold_builtin_cpu (fndecl, args);
35564 #ifdef SUBTARGET_FOLD_BUILTIN
35565 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35566 #endif
35568 return NULL_TREE;
35571 /* Make builtins to detect cpu type and features supported. NAME is
35572 the builtin name, CODE is the builtin code, and FTYPE is the function
35573 type of the builtin. */
35575 static void
35576 make_cpu_type_builtin (const char* name, int code,
35577 enum ix86_builtin_func_type ftype, bool is_const)
35579 tree decl;
35580 tree type;
35582 type = ix86_get_builtin_func_type (ftype);
35583 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35584 NULL, NULL_TREE);
35585 gcc_assert (decl != NULL_TREE);
35586 ix86_builtins[(int) code] = decl;
35587 TREE_READONLY (decl) = is_const;
35590 /* Make builtins to get CPU type and features supported. The created
35591 builtins are :
35593 __builtin_cpu_init (), to detect cpu type and features,
35594 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35595 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35598 static void
35599 ix86_init_platform_type_builtins (void)
35601 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35602 INT_FTYPE_VOID, false);
35603 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35604 INT_FTYPE_PCCHAR, true);
35605 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35606 INT_FTYPE_PCCHAR, true);
35609 /* Internal method for ix86_init_builtins. */
35611 static void
35612 ix86_init_builtins_va_builtins_abi (void)
35614 tree ms_va_ref, sysv_va_ref;
35615 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35616 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35617 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35618 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35620 if (!TARGET_64BIT)
35621 return;
35622 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35623 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35624 ms_va_ref = build_reference_type (ms_va_list_type_node);
35625 sysv_va_ref =
35626 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35628 fnvoid_va_end_ms =
35629 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35630 fnvoid_va_start_ms =
35631 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35632 fnvoid_va_end_sysv =
35633 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35634 fnvoid_va_start_sysv =
35635 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35636 NULL_TREE);
35637 fnvoid_va_copy_ms =
35638 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35639 NULL_TREE);
35640 fnvoid_va_copy_sysv =
35641 build_function_type_list (void_type_node, sysv_va_ref,
35642 sysv_va_ref, NULL_TREE);
35644 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35645 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35646 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35647 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35648 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35649 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35650 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35651 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35652 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35653 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35654 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35655 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35658 static void
35659 ix86_init_builtin_types (void)
35661 tree float128_type_node, float80_type_node;
35663 /* The __float80 type. */
35664 float80_type_node = long_double_type_node;
35665 if (TYPE_MODE (float80_type_node) != XFmode)
35667 /* The __float80 type. */
35668 float80_type_node = make_node (REAL_TYPE);
35670 TYPE_PRECISION (float80_type_node) = 80;
35671 layout_type (float80_type_node);
35673 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35675 /* The __float128 type. */
35676 float128_type_node = make_node (REAL_TYPE);
35677 TYPE_PRECISION (float128_type_node) = 128;
35678 layout_type (float128_type_node);
35679 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35681 /* This macro is built by i386-builtin-types.awk. */
35682 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35685 static void
35686 ix86_init_builtins (void)
35688 tree t;
35690 ix86_init_builtin_types ();
35692 /* Builtins to get CPU type and features. */
35693 ix86_init_platform_type_builtins ();
35695 /* TFmode support builtins. */
35696 def_builtin_const (0, "__builtin_infq",
35697 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35698 def_builtin_const (0, "__builtin_huge_valq",
35699 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35701 /* We will expand them to normal call if SSE isn't available since
35702 they are used by libgcc. */
35703 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35704 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35705 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35706 TREE_READONLY (t) = 1;
35707 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35709 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35710 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35711 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35712 TREE_READONLY (t) = 1;
35713 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35715 ix86_init_tm_builtins ();
35716 ix86_init_mmx_sse_builtins ();
35717 ix86_init_mpx_builtins ();
35719 if (TARGET_LP64)
35720 ix86_init_builtins_va_builtins_abi ();
35722 #ifdef SUBTARGET_INIT_BUILTINS
35723 SUBTARGET_INIT_BUILTINS;
35724 #endif
35727 /* Return the ix86 builtin for CODE. */
35729 static tree
35730 ix86_builtin_decl (unsigned code, bool)
35732 if (code >= IX86_BUILTIN_MAX)
35733 return error_mark_node;
35735 return ix86_builtins[code];
35738 /* Errors in the source file can cause expand_expr to return const0_rtx
35739 where we expect a vector. To avoid crashing, use one of the vector
35740 clear instructions. */
35741 static rtx
35742 safe_vector_operand (rtx x, machine_mode mode)
35744 if (x == const0_rtx)
35745 x = CONST0_RTX (mode);
35746 return x;
35749 /* Fixup modeless constants to fit required mode. */
35750 static rtx
35751 fixup_modeless_constant (rtx x, machine_mode mode)
35753 if (GET_MODE (x) == VOIDmode)
35754 x = convert_to_mode (mode, x, 1);
35755 return x;
35758 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35760 static rtx
35761 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35763 rtx pat;
35764 tree arg0 = CALL_EXPR_ARG (exp, 0);
35765 tree arg1 = CALL_EXPR_ARG (exp, 1);
35766 rtx op0 = expand_normal (arg0);
35767 rtx op1 = expand_normal (arg1);
35768 machine_mode tmode = insn_data[icode].operand[0].mode;
35769 machine_mode mode0 = insn_data[icode].operand[1].mode;
35770 machine_mode mode1 = insn_data[icode].operand[2].mode;
35772 if (VECTOR_MODE_P (mode0))
35773 op0 = safe_vector_operand (op0, mode0);
35774 if (VECTOR_MODE_P (mode1))
35775 op1 = safe_vector_operand (op1, mode1);
35777 if (optimize || !target
35778 || GET_MODE (target) != tmode
35779 || !insn_data[icode].operand[0].predicate (target, tmode))
35780 target = gen_reg_rtx (tmode);
35782 if (GET_MODE (op1) == SImode && mode1 == TImode)
35784 rtx x = gen_reg_rtx (V4SImode);
35785 emit_insn (gen_sse2_loadd (x, op1));
35786 op1 = gen_lowpart (TImode, x);
35789 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35790 op0 = copy_to_mode_reg (mode0, op0);
35791 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35792 op1 = copy_to_mode_reg (mode1, op1);
35794 pat = GEN_FCN (icode) (target, op0, op1);
35795 if (! pat)
35796 return 0;
35798 emit_insn (pat);
35800 return target;
35803 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35805 static rtx
35806 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35807 enum ix86_builtin_func_type m_type,
35808 enum rtx_code sub_code)
35810 rtx pat;
35811 int i;
35812 int nargs;
35813 bool comparison_p = false;
35814 bool tf_p = false;
35815 bool last_arg_constant = false;
35816 int num_memory = 0;
35817 struct {
35818 rtx op;
35819 machine_mode mode;
35820 } args[4];
35822 machine_mode tmode = insn_data[icode].operand[0].mode;
35824 switch (m_type)
35826 case MULTI_ARG_4_DF2_DI_I:
35827 case MULTI_ARG_4_DF2_DI_I1:
35828 case MULTI_ARG_4_SF2_SI_I:
35829 case MULTI_ARG_4_SF2_SI_I1:
35830 nargs = 4;
35831 last_arg_constant = true;
35832 break;
35834 case MULTI_ARG_3_SF:
35835 case MULTI_ARG_3_DF:
35836 case MULTI_ARG_3_SF2:
35837 case MULTI_ARG_3_DF2:
35838 case MULTI_ARG_3_DI:
35839 case MULTI_ARG_3_SI:
35840 case MULTI_ARG_3_SI_DI:
35841 case MULTI_ARG_3_HI:
35842 case MULTI_ARG_3_HI_SI:
35843 case MULTI_ARG_3_QI:
35844 case MULTI_ARG_3_DI2:
35845 case MULTI_ARG_3_SI2:
35846 case MULTI_ARG_3_HI2:
35847 case MULTI_ARG_3_QI2:
35848 nargs = 3;
35849 break;
35851 case MULTI_ARG_2_SF:
35852 case MULTI_ARG_2_DF:
35853 case MULTI_ARG_2_DI:
35854 case MULTI_ARG_2_SI:
35855 case MULTI_ARG_2_HI:
35856 case MULTI_ARG_2_QI:
35857 nargs = 2;
35858 break;
35860 case MULTI_ARG_2_DI_IMM:
35861 case MULTI_ARG_2_SI_IMM:
35862 case MULTI_ARG_2_HI_IMM:
35863 case MULTI_ARG_2_QI_IMM:
35864 nargs = 2;
35865 last_arg_constant = true;
35866 break;
35868 case MULTI_ARG_1_SF:
35869 case MULTI_ARG_1_DF:
35870 case MULTI_ARG_1_SF2:
35871 case MULTI_ARG_1_DF2:
35872 case MULTI_ARG_1_DI:
35873 case MULTI_ARG_1_SI:
35874 case MULTI_ARG_1_HI:
35875 case MULTI_ARG_1_QI:
35876 case MULTI_ARG_1_SI_DI:
35877 case MULTI_ARG_1_HI_DI:
35878 case MULTI_ARG_1_HI_SI:
35879 case MULTI_ARG_1_QI_DI:
35880 case MULTI_ARG_1_QI_SI:
35881 case MULTI_ARG_1_QI_HI:
35882 nargs = 1;
35883 break;
35885 case MULTI_ARG_2_DI_CMP:
35886 case MULTI_ARG_2_SI_CMP:
35887 case MULTI_ARG_2_HI_CMP:
35888 case MULTI_ARG_2_QI_CMP:
35889 nargs = 2;
35890 comparison_p = true;
35891 break;
35893 case MULTI_ARG_2_SF_TF:
35894 case MULTI_ARG_2_DF_TF:
35895 case MULTI_ARG_2_DI_TF:
35896 case MULTI_ARG_2_SI_TF:
35897 case MULTI_ARG_2_HI_TF:
35898 case MULTI_ARG_2_QI_TF:
35899 nargs = 2;
35900 tf_p = true;
35901 break;
35903 default:
35904 gcc_unreachable ();
35907 if (optimize || !target
35908 || GET_MODE (target) != tmode
35909 || !insn_data[icode].operand[0].predicate (target, tmode))
35910 target = gen_reg_rtx (tmode);
35912 gcc_assert (nargs <= 4);
35914 for (i = 0; i < nargs; i++)
35916 tree arg = CALL_EXPR_ARG (exp, i);
35917 rtx op = expand_normal (arg);
35918 int adjust = (comparison_p) ? 1 : 0;
35919 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35921 if (last_arg_constant && i == nargs - 1)
35923 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35925 enum insn_code new_icode = icode;
35926 switch (icode)
35928 case CODE_FOR_xop_vpermil2v2df3:
35929 case CODE_FOR_xop_vpermil2v4sf3:
35930 case CODE_FOR_xop_vpermil2v4df3:
35931 case CODE_FOR_xop_vpermil2v8sf3:
35932 error ("the last argument must be a 2-bit immediate");
35933 return gen_reg_rtx (tmode);
35934 case CODE_FOR_xop_rotlv2di3:
35935 new_icode = CODE_FOR_rotlv2di3;
35936 goto xop_rotl;
35937 case CODE_FOR_xop_rotlv4si3:
35938 new_icode = CODE_FOR_rotlv4si3;
35939 goto xop_rotl;
35940 case CODE_FOR_xop_rotlv8hi3:
35941 new_icode = CODE_FOR_rotlv8hi3;
35942 goto xop_rotl;
35943 case CODE_FOR_xop_rotlv16qi3:
35944 new_icode = CODE_FOR_rotlv16qi3;
35945 xop_rotl:
35946 if (CONST_INT_P (op))
35948 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35949 op = GEN_INT (INTVAL (op) & mask);
35950 gcc_checking_assert
35951 (insn_data[icode].operand[i + 1].predicate (op, mode));
35953 else
35955 gcc_checking_assert
35956 (nargs == 2
35957 && insn_data[new_icode].operand[0].mode == tmode
35958 && insn_data[new_icode].operand[1].mode == tmode
35959 && insn_data[new_icode].operand[2].mode == mode
35960 && insn_data[new_icode].operand[0].predicate
35961 == insn_data[icode].operand[0].predicate
35962 && insn_data[new_icode].operand[1].predicate
35963 == insn_data[icode].operand[1].predicate);
35964 icode = new_icode;
35965 goto non_constant;
35967 break;
35968 default:
35969 gcc_unreachable ();
35973 else
35975 non_constant:
35976 if (VECTOR_MODE_P (mode))
35977 op = safe_vector_operand (op, mode);
35979 /* If we aren't optimizing, only allow one memory operand to be
35980 generated. */
35981 if (memory_operand (op, mode))
35982 num_memory++;
35984 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35986 if (optimize
35987 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35988 || num_memory > 1)
35989 op = force_reg (mode, op);
35992 args[i].op = op;
35993 args[i].mode = mode;
35996 switch (nargs)
35998 case 1:
35999 pat = GEN_FCN (icode) (target, args[0].op);
36000 break;
36002 case 2:
36003 if (tf_p)
36004 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36005 GEN_INT ((int)sub_code));
36006 else if (! comparison_p)
36007 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36008 else
36010 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36011 args[0].op,
36012 args[1].op);
36014 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36016 break;
36018 case 3:
36019 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36020 break;
36022 case 4:
36023 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36024 break;
36026 default:
36027 gcc_unreachable ();
36030 if (! pat)
36031 return 0;
36033 emit_insn (pat);
36034 return target;
36037 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36038 insns with vec_merge. */
36040 static rtx
36041 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36042 rtx target)
36044 rtx pat;
36045 tree arg0 = CALL_EXPR_ARG (exp, 0);
36046 rtx op1, op0 = expand_normal (arg0);
36047 machine_mode tmode = insn_data[icode].operand[0].mode;
36048 machine_mode mode0 = insn_data[icode].operand[1].mode;
36050 if (optimize || !target
36051 || GET_MODE (target) != tmode
36052 || !insn_data[icode].operand[0].predicate (target, tmode))
36053 target = gen_reg_rtx (tmode);
36055 if (VECTOR_MODE_P (mode0))
36056 op0 = safe_vector_operand (op0, mode0);
36058 if ((optimize && !register_operand (op0, mode0))
36059 || !insn_data[icode].operand[1].predicate (op0, mode0))
36060 op0 = copy_to_mode_reg (mode0, op0);
36062 op1 = op0;
36063 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36064 op1 = copy_to_mode_reg (mode0, op1);
36066 pat = GEN_FCN (icode) (target, op0, op1);
36067 if (! pat)
36068 return 0;
36069 emit_insn (pat);
36070 return target;
36073 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36075 static rtx
36076 ix86_expand_sse_compare (const struct builtin_description *d,
36077 tree exp, rtx target, bool swap)
36079 rtx pat;
36080 tree arg0 = CALL_EXPR_ARG (exp, 0);
36081 tree arg1 = CALL_EXPR_ARG (exp, 1);
36082 rtx op0 = expand_normal (arg0);
36083 rtx op1 = expand_normal (arg1);
36084 rtx op2;
36085 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36086 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36087 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36088 enum rtx_code comparison = d->comparison;
36090 if (VECTOR_MODE_P (mode0))
36091 op0 = safe_vector_operand (op0, mode0);
36092 if (VECTOR_MODE_P (mode1))
36093 op1 = safe_vector_operand (op1, mode1);
36095 /* Swap operands if we have a comparison that isn't available in
36096 hardware. */
36097 if (swap)
36098 std::swap (op0, op1);
36100 if (optimize || !target
36101 || GET_MODE (target) != tmode
36102 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36103 target = gen_reg_rtx (tmode);
36105 if ((optimize && !register_operand (op0, mode0))
36106 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36107 op0 = copy_to_mode_reg (mode0, op0);
36108 if ((optimize && !register_operand (op1, mode1))
36109 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36110 op1 = copy_to_mode_reg (mode1, op1);
36112 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36113 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36114 if (! pat)
36115 return 0;
36116 emit_insn (pat);
36117 return target;
36120 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36122 static rtx
36123 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36124 rtx target)
36126 rtx pat;
36127 tree arg0 = CALL_EXPR_ARG (exp, 0);
36128 tree arg1 = CALL_EXPR_ARG (exp, 1);
36129 rtx op0 = expand_normal (arg0);
36130 rtx op1 = expand_normal (arg1);
36131 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36132 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36133 enum rtx_code comparison = d->comparison;
36135 if (VECTOR_MODE_P (mode0))
36136 op0 = safe_vector_operand (op0, mode0);
36137 if (VECTOR_MODE_P (mode1))
36138 op1 = safe_vector_operand (op1, mode1);
36140 /* Swap operands if we have a comparison that isn't available in
36141 hardware. */
36142 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36143 std::swap (op0, op1);
36145 target = gen_reg_rtx (SImode);
36146 emit_move_insn (target, const0_rtx);
36147 target = gen_rtx_SUBREG (QImode, target, 0);
36149 if ((optimize && !register_operand (op0, mode0))
36150 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36151 op0 = copy_to_mode_reg (mode0, op0);
36152 if ((optimize && !register_operand (op1, mode1))
36153 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36154 op1 = copy_to_mode_reg (mode1, op1);
36156 pat = GEN_FCN (d->icode) (op0, op1);
36157 if (! pat)
36158 return 0;
36159 emit_insn (pat);
36160 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36161 gen_rtx_fmt_ee (comparison, QImode,
36162 SET_DEST (pat),
36163 const0_rtx)));
36165 return SUBREG_REG (target);
36168 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36170 static rtx
36171 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36172 rtx target)
36174 rtx pat;
36175 tree arg0 = CALL_EXPR_ARG (exp, 0);
36176 rtx op1, op0 = expand_normal (arg0);
36177 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36178 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36180 if (optimize || target == 0
36181 || GET_MODE (target) != tmode
36182 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36183 target = gen_reg_rtx (tmode);
36185 if (VECTOR_MODE_P (mode0))
36186 op0 = safe_vector_operand (op0, mode0);
36188 if ((optimize && !register_operand (op0, mode0))
36189 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36190 op0 = copy_to_mode_reg (mode0, op0);
36192 op1 = GEN_INT (d->comparison);
36194 pat = GEN_FCN (d->icode) (target, op0, op1);
36195 if (! pat)
36196 return 0;
36197 emit_insn (pat);
36198 return target;
36201 static rtx
36202 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36203 tree exp, rtx target)
36205 rtx pat;
36206 tree arg0 = CALL_EXPR_ARG (exp, 0);
36207 tree arg1 = CALL_EXPR_ARG (exp, 1);
36208 rtx op0 = expand_normal (arg0);
36209 rtx op1 = expand_normal (arg1);
36210 rtx op2;
36211 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36212 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36213 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36215 if (optimize || target == 0
36216 || GET_MODE (target) != tmode
36217 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36218 target = gen_reg_rtx (tmode);
36220 op0 = safe_vector_operand (op0, mode0);
36221 op1 = safe_vector_operand (op1, mode1);
36223 if ((optimize && !register_operand (op0, mode0))
36224 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36225 op0 = copy_to_mode_reg (mode0, op0);
36226 if ((optimize && !register_operand (op1, mode1))
36227 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36228 op1 = copy_to_mode_reg (mode1, op1);
36230 op2 = GEN_INT (d->comparison);
36232 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36233 if (! pat)
36234 return 0;
36235 emit_insn (pat);
36236 return target;
36239 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36241 static rtx
36242 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36243 rtx target)
36245 rtx pat;
36246 tree arg0 = CALL_EXPR_ARG (exp, 0);
36247 tree arg1 = CALL_EXPR_ARG (exp, 1);
36248 rtx op0 = expand_normal (arg0);
36249 rtx op1 = expand_normal (arg1);
36250 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36251 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36252 enum rtx_code comparison = d->comparison;
36254 if (VECTOR_MODE_P (mode0))
36255 op0 = safe_vector_operand (op0, mode0);
36256 if (VECTOR_MODE_P (mode1))
36257 op1 = safe_vector_operand (op1, mode1);
36259 target = gen_reg_rtx (SImode);
36260 emit_move_insn (target, const0_rtx);
36261 target = gen_rtx_SUBREG (QImode, target, 0);
36263 if ((optimize && !register_operand (op0, mode0))
36264 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36265 op0 = copy_to_mode_reg (mode0, op0);
36266 if ((optimize && !register_operand (op1, mode1))
36267 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36268 op1 = copy_to_mode_reg (mode1, op1);
36270 pat = GEN_FCN (d->icode) (op0, op1);
36271 if (! pat)
36272 return 0;
36273 emit_insn (pat);
36274 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36275 gen_rtx_fmt_ee (comparison, QImode,
36276 SET_DEST (pat),
36277 const0_rtx)));
36279 return SUBREG_REG (target);
36282 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36284 static rtx
36285 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36286 tree exp, rtx target)
36288 rtx pat;
36289 tree arg0 = CALL_EXPR_ARG (exp, 0);
36290 tree arg1 = CALL_EXPR_ARG (exp, 1);
36291 tree arg2 = CALL_EXPR_ARG (exp, 2);
36292 tree arg3 = CALL_EXPR_ARG (exp, 3);
36293 tree arg4 = CALL_EXPR_ARG (exp, 4);
36294 rtx scratch0, scratch1;
36295 rtx op0 = expand_normal (arg0);
36296 rtx op1 = expand_normal (arg1);
36297 rtx op2 = expand_normal (arg2);
36298 rtx op3 = expand_normal (arg3);
36299 rtx op4 = expand_normal (arg4);
36300 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36302 tmode0 = insn_data[d->icode].operand[0].mode;
36303 tmode1 = insn_data[d->icode].operand[1].mode;
36304 modev2 = insn_data[d->icode].operand[2].mode;
36305 modei3 = insn_data[d->icode].operand[3].mode;
36306 modev4 = insn_data[d->icode].operand[4].mode;
36307 modei5 = insn_data[d->icode].operand[5].mode;
36308 modeimm = insn_data[d->icode].operand[6].mode;
36310 if (VECTOR_MODE_P (modev2))
36311 op0 = safe_vector_operand (op0, modev2);
36312 if (VECTOR_MODE_P (modev4))
36313 op2 = safe_vector_operand (op2, modev4);
36315 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36316 op0 = copy_to_mode_reg (modev2, op0);
36317 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36318 op1 = copy_to_mode_reg (modei3, op1);
36319 if ((optimize && !register_operand (op2, modev4))
36320 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36321 op2 = copy_to_mode_reg (modev4, op2);
36322 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36323 op3 = copy_to_mode_reg (modei5, op3);
36325 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36327 error ("the fifth argument must be an 8-bit immediate");
36328 return const0_rtx;
36331 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36333 if (optimize || !target
36334 || GET_MODE (target) != tmode0
36335 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36336 target = gen_reg_rtx (tmode0);
36338 scratch1 = gen_reg_rtx (tmode1);
36340 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36342 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36344 if (optimize || !target
36345 || GET_MODE (target) != tmode1
36346 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36347 target = gen_reg_rtx (tmode1);
36349 scratch0 = gen_reg_rtx (tmode0);
36351 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36353 else
36355 gcc_assert (d->flag);
36357 scratch0 = gen_reg_rtx (tmode0);
36358 scratch1 = gen_reg_rtx (tmode1);
36360 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36363 if (! pat)
36364 return 0;
36366 emit_insn (pat);
36368 if (d->flag)
36370 target = gen_reg_rtx (SImode);
36371 emit_move_insn (target, const0_rtx);
36372 target = gen_rtx_SUBREG (QImode, target, 0);
36374 emit_insn
36375 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36376 gen_rtx_fmt_ee (EQ, QImode,
36377 gen_rtx_REG ((machine_mode) d->flag,
36378 FLAGS_REG),
36379 const0_rtx)));
36380 return SUBREG_REG (target);
36382 else
36383 return target;
36387 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36389 static rtx
36390 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36391 tree exp, rtx target)
36393 rtx pat;
36394 tree arg0 = CALL_EXPR_ARG (exp, 0);
36395 tree arg1 = CALL_EXPR_ARG (exp, 1);
36396 tree arg2 = CALL_EXPR_ARG (exp, 2);
36397 rtx scratch0, scratch1;
36398 rtx op0 = expand_normal (arg0);
36399 rtx op1 = expand_normal (arg1);
36400 rtx op2 = expand_normal (arg2);
36401 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36403 tmode0 = insn_data[d->icode].operand[0].mode;
36404 tmode1 = insn_data[d->icode].operand[1].mode;
36405 modev2 = insn_data[d->icode].operand[2].mode;
36406 modev3 = insn_data[d->icode].operand[3].mode;
36407 modeimm = insn_data[d->icode].operand[4].mode;
36409 if (VECTOR_MODE_P (modev2))
36410 op0 = safe_vector_operand (op0, modev2);
36411 if (VECTOR_MODE_P (modev3))
36412 op1 = safe_vector_operand (op1, modev3);
36414 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36415 op0 = copy_to_mode_reg (modev2, op0);
36416 if ((optimize && !register_operand (op1, modev3))
36417 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36418 op1 = copy_to_mode_reg (modev3, op1);
36420 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36422 error ("the third argument must be an 8-bit immediate");
36423 return const0_rtx;
36426 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36428 if (optimize || !target
36429 || GET_MODE (target) != tmode0
36430 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36431 target = gen_reg_rtx (tmode0);
36433 scratch1 = gen_reg_rtx (tmode1);
36435 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36437 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36439 if (optimize || !target
36440 || GET_MODE (target) != tmode1
36441 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36442 target = gen_reg_rtx (tmode1);
36444 scratch0 = gen_reg_rtx (tmode0);
36446 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36448 else
36450 gcc_assert (d->flag);
36452 scratch0 = gen_reg_rtx (tmode0);
36453 scratch1 = gen_reg_rtx (tmode1);
36455 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36458 if (! pat)
36459 return 0;
36461 emit_insn (pat);
36463 if (d->flag)
36465 target = gen_reg_rtx (SImode);
36466 emit_move_insn (target, const0_rtx);
36467 target = gen_rtx_SUBREG (QImode, target, 0);
36469 emit_insn
36470 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36471 gen_rtx_fmt_ee (EQ, QImode,
36472 gen_rtx_REG ((machine_mode) d->flag,
36473 FLAGS_REG),
36474 const0_rtx)));
36475 return SUBREG_REG (target);
36477 else
36478 return target;
36481 /* Subroutine of ix86_expand_builtin to take care of insns with
36482 variable number of operands. */
36484 static rtx
36485 ix86_expand_args_builtin (const struct builtin_description *d,
36486 tree exp, rtx target)
36488 rtx pat, real_target;
36489 unsigned int i, nargs;
36490 unsigned int nargs_constant = 0;
36491 unsigned int mask_pos = 0;
36492 int num_memory = 0;
36493 struct
36495 rtx op;
36496 machine_mode mode;
36497 } args[6];
36498 bool last_arg_count = false;
36499 enum insn_code icode = d->icode;
36500 const struct insn_data_d *insn_p = &insn_data[icode];
36501 machine_mode tmode = insn_p->operand[0].mode;
36502 machine_mode rmode = VOIDmode;
36503 bool swap = false;
36504 enum rtx_code comparison = d->comparison;
36506 switch ((enum ix86_builtin_func_type) d->flag)
36508 case V2DF_FTYPE_V2DF_ROUND:
36509 case V4DF_FTYPE_V4DF_ROUND:
36510 case V4SF_FTYPE_V4SF_ROUND:
36511 case V8SF_FTYPE_V8SF_ROUND:
36512 case V4SI_FTYPE_V4SF_ROUND:
36513 case V8SI_FTYPE_V8SF_ROUND:
36514 return ix86_expand_sse_round (d, exp, target);
36515 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36516 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36517 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36518 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36519 case INT_FTYPE_V8SF_V8SF_PTEST:
36520 case INT_FTYPE_V4DI_V4DI_PTEST:
36521 case INT_FTYPE_V4DF_V4DF_PTEST:
36522 case INT_FTYPE_V4SF_V4SF_PTEST:
36523 case INT_FTYPE_V2DI_V2DI_PTEST:
36524 case INT_FTYPE_V2DF_V2DF_PTEST:
36525 return ix86_expand_sse_ptest (d, exp, target);
36526 case FLOAT128_FTYPE_FLOAT128:
36527 case FLOAT_FTYPE_FLOAT:
36528 case INT_FTYPE_INT:
36529 case UINT64_FTYPE_INT:
36530 case UINT16_FTYPE_UINT16:
36531 case INT64_FTYPE_INT64:
36532 case INT64_FTYPE_V4SF:
36533 case INT64_FTYPE_V2DF:
36534 case INT_FTYPE_V16QI:
36535 case INT_FTYPE_V8QI:
36536 case INT_FTYPE_V8SF:
36537 case INT_FTYPE_V4DF:
36538 case INT_FTYPE_V4SF:
36539 case INT_FTYPE_V2DF:
36540 case INT_FTYPE_V32QI:
36541 case V16QI_FTYPE_V16QI:
36542 case V8SI_FTYPE_V8SF:
36543 case V8SI_FTYPE_V4SI:
36544 case V8HI_FTYPE_V8HI:
36545 case V8HI_FTYPE_V16QI:
36546 case V8QI_FTYPE_V8QI:
36547 case V8SF_FTYPE_V8SF:
36548 case V8SF_FTYPE_V8SI:
36549 case V8SF_FTYPE_V4SF:
36550 case V8SF_FTYPE_V8HI:
36551 case V4SI_FTYPE_V4SI:
36552 case V4SI_FTYPE_V16QI:
36553 case V4SI_FTYPE_V4SF:
36554 case V4SI_FTYPE_V8SI:
36555 case V4SI_FTYPE_V8HI:
36556 case V4SI_FTYPE_V4DF:
36557 case V4SI_FTYPE_V2DF:
36558 case V4HI_FTYPE_V4HI:
36559 case V4DF_FTYPE_V4DF:
36560 case V4DF_FTYPE_V4SI:
36561 case V4DF_FTYPE_V4SF:
36562 case V4DF_FTYPE_V2DF:
36563 case V4SF_FTYPE_V4SF:
36564 case V4SF_FTYPE_V4SI:
36565 case V4SF_FTYPE_V8SF:
36566 case V4SF_FTYPE_V4DF:
36567 case V4SF_FTYPE_V8HI:
36568 case V4SF_FTYPE_V2DF:
36569 case V2DI_FTYPE_V2DI:
36570 case V2DI_FTYPE_V16QI:
36571 case V2DI_FTYPE_V8HI:
36572 case V2DI_FTYPE_V4SI:
36573 case V2DF_FTYPE_V2DF:
36574 case V2DF_FTYPE_V4SI:
36575 case V2DF_FTYPE_V4DF:
36576 case V2DF_FTYPE_V4SF:
36577 case V2DF_FTYPE_V2SI:
36578 case V2SI_FTYPE_V2SI:
36579 case V2SI_FTYPE_V4SF:
36580 case V2SI_FTYPE_V2SF:
36581 case V2SI_FTYPE_V2DF:
36582 case V2SF_FTYPE_V2SF:
36583 case V2SF_FTYPE_V2SI:
36584 case V32QI_FTYPE_V32QI:
36585 case V32QI_FTYPE_V16QI:
36586 case V16HI_FTYPE_V16HI:
36587 case V16HI_FTYPE_V8HI:
36588 case V8SI_FTYPE_V8SI:
36589 case V16HI_FTYPE_V16QI:
36590 case V8SI_FTYPE_V16QI:
36591 case V4DI_FTYPE_V16QI:
36592 case V8SI_FTYPE_V8HI:
36593 case V4DI_FTYPE_V8HI:
36594 case V4DI_FTYPE_V4SI:
36595 case V4DI_FTYPE_V2DI:
36596 case HI_FTYPE_HI:
36597 case HI_FTYPE_V16QI:
36598 case SI_FTYPE_V32QI:
36599 case DI_FTYPE_V64QI:
36600 case V16QI_FTYPE_HI:
36601 case V32QI_FTYPE_SI:
36602 case V64QI_FTYPE_DI:
36603 case V8HI_FTYPE_QI:
36604 case V16HI_FTYPE_HI:
36605 case V32HI_FTYPE_SI:
36606 case V4SI_FTYPE_QI:
36607 case V8SI_FTYPE_QI:
36608 case V4SI_FTYPE_HI:
36609 case V8SI_FTYPE_HI:
36610 case QI_FTYPE_V8HI:
36611 case HI_FTYPE_V16HI:
36612 case SI_FTYPE_V32HI:
36613 case QI_FTYPE_V4SI:
36614 case QI_FTYPE_V8SI:
36615 case HI_FTYPE_V16SI:
36616 case QI_FTYPE_V2DI:
36617 case QI_FTYPE_V4DI:
36618 case QI_FTYPE_V8DI:
36619 case UINT_FTYPE_V2DF:
36620 case UINT_FTYPE_V4SF:
36621 case UINT64_FTYPE_V2DF:
36622 case UINT64_FTYPE_V4SF:
36623 case V16QI_FTYPE_V8DI:
36624 case V16HI_FTYPE_V16SI:
36625 case V16SI_FTYPE_HI:
36626 case V2DI_FTYPE_QI:
36627 case V4DI_FTYPE_QI:
36628 case V16SI_FTYPE_V16SI:
36629 case V16SI_FTYPE_INT:
36630 case V16SF_FTYPE_FLOAT:
36631 case V16SF_FTYPE_V8SF:
36632 case V16SI_FTYPE_V8SI:
36633 case V16SF_FTYPE_V4SF:
36634 case V16SI_FTYPE_V4SI:
36635 case V16SF_FTYPE_V16SF:
36636 case V8HI_FTYPE_V8DI:
36637 case V8UHI_FTYPE_V8UHI:
36638 case V8SI_FTYPE_V8DI:
36639 case V8SF_FTYPE_V8DF:
36640 case V8DI_FTYPE_QI:
36641 case V8DI_FTYPE_INT64:
36642 case V8DI_FTYPE_V4DI:
36643 case V8DI_FTYPE_V8DI:
36644 case V8DF_FTYPE_DOUBLE:
36645 case V8DF_FTYPE_V4DF:
36646 case V8DF_FTYPE_V2DF:
36647 case V8DF_FTYPE_V8DF:
36648 case V8DF_FTYPE_V8SI:
36649 nargs = 1;
36650 break;
36651 case V4SF_FTYPE_V4SF_VEC_MERGE:
36652 case V2DF_FTYPE_V2DF_VEC_MERGE:
36653 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36654 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36655 case V16QI_FTYPE_V16QI_V16QI:
36656 case V16QI_FTYPE_V8HI_V8HI:
36657 case V16SI_FTYPE_V16SI_V16SI:
36658 case V16SF_FTYPE_V16SF_V16SF:
36659 case V16SF_FTYPE_V16SF_V16SI:
36660 case V8QI_FTYPE_V8QI_V8QI:
36661 case V8QI_FTYPE_V4HI_V4HI:
36662 case V8HI_FTYPE_V8HI_V8HI:
36663 case V8HI_FTYPE_V16QI_V16QI:
36664 case V8HI_FTYPE_V4SI_V4SI:
36665 case V8SF_FTYPE_V8SF_V8SF:
36666 case V8SF_FTYPE_V8SF_V8SI:
36667 case V8DI_FTYPE_V8DI_V8DI:
36668 case V8DF_FTYPE_V8DF_V8DF:
36669 case V8DF_FTYPE_V8DF_V8DI:
36670 case V4SI_FTYPE_V4SI_V4SI:
36671 case V4SI_FTYPE_V8HI_V8HI:
36672 case V4SI_FTYPE_V4SF_V4SF:
36673 case V4SI_FTYPE_V2DF_V2DF:
36674 case V4HI_FTYPE_V4HI_V4HI:
36675 case V4HI_FTYPE_V8QI_V8QI:
36676 case V4HI_FTYPE_V2SI_V2SI:
36677 case V4DF_FTYPE_V4DF_V4DF:
36678 case V4DF_FTYPE_V4DF_V4DI:
36679 case V4SF_FTYPE_V4SF_V4SF:
36680 case V4SF_FTYPE_V4SF_V4SI:
36681 case V4SF_FTYPE_V4SF_V2SI:
36682 case V4SF_FTYPE_V4SF_V2DF:
36683 case V4SF_FTYPE_V4SF_UINT:
36684 case V4SF_FTYPE_V4SF_UINT64:
36685 case V4SF_FTYPE_V4SF_DI:
36686 case V4SF_FTYPE_V4SF_SI:
36687 case V2DI_FTYPE_V2DI_V2DI:
36688 case V2DI_FTYPE_V16QI_V16QI:
36689 case V2DI_FTYPE_V4SI_V4SI:
36690 case V2UDI_FTYPE_V4USI_V4USI:
36691 case V2DI_FTYPE_V2DI_V16QI:
36692 case V2DI_FTYPE_V2DF_V2DF:
36693 case V2SI_FTYPE_V2SI_V2SI:
36694 case V2SI_FTYPE_V4HI_V4HI:
36695 case V2SI_FTYPE_V2SF_V2SF:
36696 case V2DF_FTYPE_V2DF_V2DF:
36697 case V2DF_FTYPE_V2DF_V4SF:
36698 case V2DF_FTYPE_V2DF_V2DI:
36699 case V2DF_FTYPE_V2DF_DI:
36700 case V2DF_FTYPE_V2DF_SI:
36701 case V2DF_FTYPE_V2DF_UINT:
36702 case V2DF_FTYPE_V2DF_UINT64:
36703 case V2SF_FTYPE_V2SF_V2SF:
36704 case V1DI_FTYPE_V1DI_V1DI:
36705 case V1DI_FTYPE_V8QI_V8QI:
36706 case V1DI_FTYPE_V2SI_V2SI:
36707 case V32QI_FTYPE_V16HI_V16HI:
36708 case V16HI_FTYPE_V8SI_V8SI:
36709 case V32QI_FTYPE_V32QI_V32QI:
36710 case V16HI_FTYPE_V32QI_V32QI:
36711 case V16HI_FTYPE_V16HI_V16HI:
36712 case V8SI_FTYPE_V4DF_V4DF:
36713 case V8SI_FTYPE_V8SI_V8SI:
36714 case V8SI_FTYPE_V16HI_V16HI:
36715 case V4DI_FTYPE_V4DI_V4DI:
36716 case V4DI_FTYPE_V8SI_V8SI:
36717 case V4UDI_FTYPE_V8USI_V8USI:
36718 case QI_FTYPE_V8DI_V8DI:
36719 case V8DI_FTYPE_V64QI_V64QI:
36720 case HI_FTYPE_V16SI_V16SI:
36721 if (comparison == UNKNOWN)
36722 return ix86_expand_binop_builtin (icode, exp, target);
36723 nargs = 2;
36724 break;
36725 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36726 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36727 gcc_assert (comparison != UNKNOWN);
36728 nargs = 2;
36729 swap = true;
36730 break;
36731 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36732 case V16HI_FTYPE_V16HI_SI_COUNT:
36733 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36734 case V8SI_FTYPE_V8SI_SI_COUNT:
36735 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36736 case V4DI_FTYPE_V4DI_INT_COUNT:
36737 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36738 case V8HI_FTYPE_V8HI_SI_COUNT:
36739 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36740 case V4SI_FTYPE_V4SI_SI_COUNT:
36741 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36742 case V4HI_FTYPE_V4HI_SI_COUNT:
36743 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36744 case V2DI_FTYPE_V2DI_SI_COUNT:
36745 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36746 case V2SI_FTYPE_V2SI_SI_COUNT:
36747 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36748 case V1DI_FTYPE_V1DI_SI_COUNT:
36749 nargs = 2;
36750 last_arg_count = true;
36751 break;
36752 case UINT64_FTYPE_UINT64_UINT64:
36753 case UINT_FTYPE_UINT_UINT:
36754 case UINT_FTYPE_UINT_USHORT:
36755 case UINT_FTYPE_UINT_UCHAR:
36756 case UINT16_FTYPE_UINT16_INT:
36757 case UINT8_FTYPE_UINT8_INT:
36758 case HI_FTYPE_HI_HI:
36759 case SI_FTYPE_SI_SI:
36760 case DI_FTYPE_DI_DI:
36761 case V16SI_FTYPE_V8DF_V8DF:
36762 nargs = 2;
36763 break;
36764 case V2DI_FTYPE_V2DI_INT_CONVERT:
36765 nargs = 2;
36766 rmode = V1TImode;
36767 nargs_constant = 1;
36768 break;
36769 case V4DI_FTYPE_V4DI_INT_CONVERT:
36770 nargs = 2;
36771 rmode = V2TImode;
36772 nargs_constant = 1;
36773 break;
36774 case V8DI_FTYPE_V8DI_INT_CONVERT:
36775 nargs = 2;
36776 rmode = V4TImode;
36777 nargs_constant = 1;
36778 break;
36779 case V8HI_FTYPE_V8HI_INT:
36780 case V8HI_FTYPE_V8SF_INT:
36781 case V16HI_FTYPE_V16SF_INT:
36782 case V8HI_FTYPE_V4SF_INT:
36783 case V8SF_FTYPE_V8SF_INT:
36784 case V4SF_FTYPE_V16SF_INT:
36785 case V16SF_FTYPE_V16SF_INT:
36786 case V4SI_FTYPE_V4SI_INT:
36787 case V4SI_FTYPE_V8SI_INT:
36788 case V4HI_FTYPE_V4HI_INT:
36789 case V4DF_FTYPE_V4DF_INT:
36790 case V4DF_FTYPE_V8DF_INT:
36791 case V4SF_FTYPE_V4SF_INT:
36792 case V4SF_FTYPE_V8SF_INT:
36793 case V2DI_FTYPE_V2DI_INT:
36794 case V2DF_FTYPE_V2DF_INT:
36795 case V2DF_FTYPE_V4DF_INT:
36796 case V16HI_FTYPE_V16HI_INT:
36797 case V8SI_FTYPE_V8SI_INT:
36798 case V16SI_FTYPE_V16SI_INT:
36799 case V4SI_FTYPE_V16SI_INT:
36800 case V4DI_FTYPE_V4DI_INT:
36801 case V2DI_FTYPE_V4DI_INT:
36802 case V4DI_FTYPE_V8DI_INT:
36803 case HI_FTYPE_HI_INT:
36804 case QI_FTYPE_V4SF_INT:
36805 case QI_FTYPE_V2DF_INT:
36806 nargs = 2;
36807 nargs_constant = 1;
36808 break;
36809 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36810 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36811 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36812 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36813 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36814 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36815 case HI_FTYPE_V16SI_V16SI_HI:
36816 case QI_FTYPE_V8DI_V8DI_QI:
36817 case V16HI_FTYPE_V16SI_V16HI_HI:
36818 case V16QI_FTYPE_V16SI_V16QI_HI:
36819 case V16QI_FTYPE_V8DI_V16QI_QI:
36820 case V16SF_FTYPE_V16SF_V16SF_HI:
36821 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36822 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36823 case V16SF_FTYPE_V16SI_V16SF_HI:
36824 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36825 case V16SF_FTYPE_V4SF_V16SF_HI:
36826 case V16SI_FTYPE_SI_V16SI_HI:
36827 case V16SI_FTYPE_V16HI_V16SI_HI:
36828 case V16SI_FTYPE_V16QI_V16SI_HI:
36829 case V16SI_FTYPE_V16SF_V16SI_HI:
36830 case V8SF_FTYPE_V4SF_V8SF_QI:
36831 case V4DF_FTYPE_V2DF_V4DF_QI:
36832 case V8SI_FTYPE_V4SI_V8SI_QI:
36833 case V8SI_FTYPE_SI_V8SI_QI:
36834 case V4SI_FTYPE_V4SI_V4SI_QI:
36835 case V4SI_FTYPE_SI_V4SI_QI:
36836 case V4DI_FTYPE_V2DI_V4DI_QI:
36837 case V4DI_FTYPE_DI_V4DI_QI:
36838 case V2DI_FTYPE_V2DI_V2DI_QI:
36839 case V2DI_FTYPE_DI_V2DI_QI:
36840 case V64QI_FTYPE_V64QI_V64QI_DI:
36841 case V64QI_FTYPE_V16QI_V64QI_DI:
36842 case V64QI_FTYPE_QI_V64QI_DI:
36843 case V32QI_FTYPE_V32QI_V32QI_SI:
36844 case V32QI_FTYPE_V16QI_V32QI_SI:
36845 case V32QI_FTYPE_QI_V32QI_SI:
36846 case V16QI_FTYPE_V16QI_V16QI_HI:
36847 case V16QI_FTYPE_QI_V16QI_HI:
36848 case V32HI_FTYPE_V8HI_V32HI_SI:
36849 case V32HI_FTYPE_HI_V32HI_SI:
36850 case V16HI_FTYPE_V8HI_V16HI_HI:
36851 case V16HI_FTYPE_HI_V16HI_HI:
36852 case V8HI_FTYPE_V8HI_V8HI_QI:
36853 case V8HI_FTYPE_HI_V8HI_QI:
36854 case V8SF_FTYPE_V8HI_V8SF_QI:
36855 case V4SF_FTYPE_V8HI_V4SF_QI:
36856 case V8SI_FTYPE_V8SF_V8SI_QI:
36857 case V4SI_FTYPE_V4SF_V4SI_QI:
36858 case V8DI_FTYPE_V8SF_V8DI_QI:
36859 case V4DI_FTYPE_V4SF_V4DI_QI:
36860 case V2DI_FTYPE_V4SF_V2DI_QI:
36861 case V8SF_FTYPE_V8DI_V8SF_QI:
36862 case V4SF_FTYPE_V4DI_V4SF_QI:
36863 case V4SF_FTYPE_V2DI_V4SF_QI:
36864 case V8DF_FTYPE_V8DI_V8DF_QI:
36865 case V4DF_FTYPE_V4DI_V4DF_QI:
36866 case V2DF_FTYPE_V2DI_V2DF_QI:
36867 case V16QI_FTYPE_V8HI_V16QI_QI:
36868 case V16QI_FTYPE_V16HI_V16QI_HI:
36869 case V16QI_FTYPE_V4SI_V16QI_QI:
36870 case V16QI_FTYPE_V8SI_V16QI_QI:
36871 case V8HI_FTYPE_V4SI_V8HI_QI:
36872 case V8HI_FTYPE_V8SI_V8HI_QI:
36873 case V16QI_FTYPE_V2DI_V16QI_QI:
36874 case V16QI_FTYPE_V4DI_V16QI_QI:
36875 case V8HI_FTYPE_V2DI_V8HI_QI:
36876 case V8HI_FTYPE_V4DI_V8HI_QI:
36877 case V4SI_FTYPE_V2DI_V4SI_QI:
36878 case V4SI_FTYPE_V4DI_V4SI_QI:
36879 case V32QI_FTYPE_V32HI_V32QI_SI:
36880 case HI_FTYPE_V16QI_V16QI_HI:
36881 case SI_FTYPE_V32QI_V32QI_SI:
36882 case DI_FTYPE_V64QI_V64QI_DI:
36883 case QI_FTYPE_V8HI_V8HI_QI:
36884 case HI_FTYPE_V16HI_V16HI_HI:
36885 case SI_FTYPE_V32HI_V32HI_SI:
36886 case QI_FTYPE_V4SI_V4SI_QI:
36887 case QI_FTYPE_V8SI_V8SI_QI:
36888 case QI_FTYPE_V2DI_V2DI_QI:
36889 case QI_FTYPE_V4DI_V4DI_QI:
36890 case V4SF_FTYPE_V2DF_V4SF_QI:
36891 case V4SF_FTYPE_V4DF_V4SF_QI:
36892 case V16SI_FTYPE_V16SI_V16SI_HI:
36893 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36894 case V16SI_FTYPE_V4SI_V16SI_HI:
36895 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36896 case V2DI_FTYPE_V4SI_V2DI_QI:
36897 case V2DI_FTYPE_V8HI_V2DI_QI:
36898 case V2DI_FTYPE_V16QI_V2DI_QI:
36899 case V4DI_FTYPE_V4DI_V4DI_QI:
36900 case V4DI_FTYPE_V4SI_V4DI_QI:
36901 case V4DI_FTYPE_V8HI_V4DI_QI:
36902 case V4DI_FTYPE_V16QI_V4DI_QI:
36903 case V8DI_FTYPE_V8DF_V8DI_QI:
36904 case V4DI_FTYPE_V4DF_V4DI_QI:
36905 case V2DI_FTYPE_V2DF_V2DI_QI:
36906 case V4SI_FTYPE_V4DF_V4SI_QI:
36907 case V4SI_FTYPE_V2DF_V4SI_QI:
36908 case V4SI_FTYPE_V8HI_V4SI_QI:
36909 case V4SI_FTYPE_V16QI_V4SI_QI:
36910 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36911 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36912 case V8DF_FTYPE_V2DF_V8DF_QI:
36913 case V8DF_FTYPE_V4DF_V8DF_QI:
36914 case V8DF_FTYPE_V8DF_V8DF_QI:
36915 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36916 case V8SF_FTYPE_V8SF_V8SF_QI:
36917 case V8SF_FTYPE_V8SI_V8SF_QI:
36918 case V4DF_FTYPE_V4DF_V4DF_QI:
36919 case V4SF_FTYPE_V4SF_V4SF_QI:
36920 case V2DF_FTYPE_V2DF_V2DF_QI:
36921 case V2DF_FTYPE_V4SF_V2DF_QI:
36922 case V2DF_FTYPE_V4SI_V2DF_QI:
36923 case V4SF_FTYPE_V4SI_V4SF_QI:
36924 case V4DF_FTYPE_V4SF_V4DF_QI:
36925 case V4DF_FTYPE_V4SI_V4DF_QI:
36926 case V8SI_FTYPE_V8SI_V8SI_QI:
36927 case V8SI_FTYPE_V8HI_V8SI_QI:
36928 case V8SI_FTYPE_V16QI_V8SI_QI:
36929 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36930 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36931 case V8DF_FTYPE_V8SF_V8DF_QI:
36932 case V8DF_FTYPE_V8SI_V8DF_QI:
36933 case V8DI_FTYPE_DI_V8DI_QI:
36934 case V16SF_FTYPE_V8SF_V16SF_HI:
36935 case V16SI_FTYPE_V8SI_V16SI_HI:
36936 case V16HI_FTYPE_V16HI_V16HI_HI:
36937 case V8HI_FTYPE_V16QI_V8HI_QI:
36938 case V16HI_FTYPE_V16QI_V16HI_HI:
36939 case V32HI_FTYPE_V32HI_V32HI_SI:
36940 case V32HI_FTYPE_V32QI_V32HI_SI:
36941 case V8DI_FTYPE_V16QI_V8DI_QI:
36942 case V8DI_FTYPE_V2DI_V8DI_QI:
36943 case V8DI_FTYPE_V4DI_V8DI_QI:
36944 case V8DI_FTYPE_V8DI_V8DI_QI:
36945 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36946 case V8DI_FTYPE_V8HI_V8DI_QI:
36947 case V8DI_FTYPE_V8SI_V8DI_QI:
36948 case V8HI_FTYPE_V8DI_V8HI_QI:
36949 case V8SF_FTYPE_V8DF_V8SF_QI:
36950 case V8SI_FTYPE_V8DF_V8SI_QI:
36951 case V8SI_FTYPE_V8DI_V8SI_QI:
36952 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36953 nargs = 3;
36954 break;
36955 case V32QI_FTYPE_V32QI_V32QI_INT:
36956 case V16HI_FTYPE_V16HI_V16HI_INT:
36957 case V16QI_FTYPE_V16QI_V16QI_INT:
36958 case V4DI_FTYPE_V4DI_V4DI_INT:
36959 case V8HI_FTYPE_V8HI_V8HI_INT:
36960 case V8SI_FTYPE_V8SI_V8SI_INT:
36961 case V8SI_FTYPE_V8SI_V4SI_INT:
36962 case V8SF_FTYPE_V8SF_V8SF_INT:
36963 case V8SF_FTYPE_V8SF_V4SF_INT:
36964 case V4SI_FTYPE_V4SI_V4SI_INT:
36965 case V4DF_FTYPE_V4DF_V4DF_INT:
36966 case V16SF_FTYPE_V16SF_V16SF_INT:
36967 case V16SF_FTYPE_V16SF_V4SF_INT:
36968 case V16SI_FTYPE_V16SI_V4SI_INT:
36969 case V4DF_FTYPE_V4DF_V2DF_INT:
36970 case V4SF_FTYPE_V4SF_V4SF_INT:
36971 case V2DI_FTYPE_V2DI_V2DI_INT:
36972 case V4DI_FTYPE_V4DI_V2DI_INT:
36973 case V2DF_FTYPE_V2DF_V2DF_INT:
36974 case QI_FTYPE_V8DI_V8DI_INT:
36975 case QI_FTYPE_V8DF_V8DF_INT:
36976 case QI_FTYPE_V2DF_V2DF_INT:
36977 case QI_FTYPE_V4SF_V4SF_INT:
36978 case HI_FTYPE_V16SI_V16SI_INT:
36979 case HI_FTYPE_V16SF_V16SF_INT:
36980 nargs = 3;
36981 nargs_constant = 1;
36982 break;
36983 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36984 nargs = 3;
36985 rmode = V4DImode;
36986 nargs_constant = 1;
36987 break;
36988 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36989 nargs = 3;
36990 rmode = V2DImode;
36991 nargs_constant = 1;
36992 break;
36993 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36994 nargs = 3;
36995 rmode = DImode;
36996 nargs_constant = 1;
36997 break;
36998 case V2DI_FTYPE_V2DI_UINT_UINT:
36999 nargs = 3;
37000 nargs_constant = 2;
37001 break;
37002 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37003 nargs = 3;
37004 rmode = V8DImode;
37005 nargs_constant = 1;
37006 break;
37007 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37008 nargs = 5;
37009 rmode = V8DImode;
37010 mask_pos = 2;
37011 nargs_constant = 1;
37012 break;
37013 case QI_FTYPE_V8DF_INT_QI:
37014 case QI_FTYPE_V4DF_INT_QI:
37015 case QI_FTYPE_V2DF_INT_QI:
37016 case HI_FTYPE_V16SF_INT_HI:
37017 case QI_FTYPE_V8SF_INT_QI:
37018 case QI_FTYPE_V4SF_INT_QI:
37019 nargs = 3;
37020 mask_pos = 1;
37021 nargs_constant = 1;
37022 break;
37023 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37024 nargs = 5;
37025 rmode = V4DImode;
37026 mask_pos = 2;
37027 nargs_constant = 1;
37028 break;
37029 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37030 nargs = 5;
37031 rmode = V2DImode;
37032 mask_pos = 2;
37033 nargs_constant = 1;
37034 break;
37035 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37036 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37037 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37038 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37039 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37040 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37041 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37042 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37043 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37044 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37045 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37046 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37047 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37048 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37049 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37050 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37051 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37052 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37053 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37054 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37055 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37056 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37057 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37058 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37059 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37060 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37061 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37062 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37063 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37064 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37065 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37066 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37067 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37068 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37069 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37070 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37071 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37072 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37073 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37074 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37075 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37076 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37077 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37078 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37079 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37080 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37081 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37082 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37083 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37084 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37085 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37086 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37087 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37088 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37089 nargs = 4;
37090 break;
37091 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37092 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37093 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37094 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37095 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37096 nargs = 4;
37097 nargs_constant = 1;
37098 break;
37099 case QI_FTYPE_V4DI_V4DI_INT_QI:
37100 case QI_FTYPE_V8SI_V8SI_INT_QI:
37101 case QI_FTYPE_V4DF_V4DF_INT_QI:
37102 case QI_FTYPE_V8SF_V8SF_INT_QI:
37103 case QI_FTYPE_V2DI_V2DI_INT_QI:
37104 case QI_FTYPE_V4SI_V4SI_INT_QI:
37105 case QI_FTYPE_V2DF_V2DF_INT_QI:
37106 case QI_FTYPE_V4SF_V4SF_INT_QI:
37107 case DI_FTYPE_V64QI_V64QI_INT_DI:
37108 case SI_FTYPE_V32QI_V32QI_INT_SI:
37109 case HI_FTYPE_V16QI_V16QI_INT_HI:
37110 case SI_FTYPE_V32HI_V32HI_INT_SI:
37111 case HI_FTYPE_V16HI_V16HI_INT_HI:
37112 case QI_FTYPE_V8HI_V8HI_INT_QI:
37113 nargs = 4;
37114 mask_pos = 1;
37115 nargs_constant = 1;
37116 break;
37117 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37118 nargs = 4;
37119 nargs_constant = 2;
37120 break;
37121 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37122 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37123 nargs = 4;
37124 break;
37125 case QI_FTYPE_V8DI_V8DI_INT_QI:
37126 case HI_FTYPE_V16SI_V16SI_INT_HI:
37127 case QI_FTYPE_V8DF_V8DF_INT_QI:
37128 case HI_FTYPE_V16SF_V16SF_INT_HI:
37129 mask_pos = 1;
37130 nargs = 4;
37131 nargs_constant = 1;
37132 break;
37133 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37134 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37135 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37136 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37137 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37138 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37139 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37140 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37141 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37142 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37143 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37144 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37145 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37146 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37147 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37148 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37149 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37150 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37151 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37152 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37153 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37154 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37155 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37156 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37157 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37158 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37159 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37160 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37161 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37162 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37163 nargs = 4;
37164 mask_pos = 2;
37165 nargs_constant = 1;
37166 break;
37167 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37168 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37169 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37170 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37171 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37172 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37173 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37174 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37175 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37176 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37177 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37178 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37179 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37180 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37181 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37182 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37183 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37184 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37185 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37186 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37187 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37188 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37189 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37190 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37191 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37192 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37193 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37194 nargs = 5;
37195 mask_pos = 2;
37196 nargs_constant = 1;
37197 break;
37198 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37199 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37200 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37201 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37202 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37203 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37204 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37205 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37206 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37207 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37208 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37209 nargs = 5;
37210 nargs = 5;
37211 mask_pos = 1;
37212 nargs_constant = 1;
37213 break;
37215 default:
37216 gcc_unreachable ();
37219 gcc_assert (nargs <= ARRAY_SIZE (args));
37221 if (comparison != UNKNOWN)
37223 gcc_assert (nargs == 2);
37224 return ix86_expand_sse_compare (d, exp, target, swap);
37227 if (rmode == VOIDmode || rmode == tmode)
37229 if (optimize
37230 || target == 0
37231 || GET_MODE (target) != tmode
37232 || !insn_p->operand[0].predicate (target, tmode))
37233 target = gen_reg_rtx (tmode);
37234 real_target = target;
37236 else
37238 real_target = gen_reg_rtx (tmode);
37239 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37242 for (i = 0; i < nargs; i++)
37244 tree arg = CALL_EXPR_ARG (exp, i);
37245 rtx op = expand_normal (arg);
37246 machine_mode mode = insn_p->operand[i + 1].mode;
37247 bool match = insn_p->operand[i + 1].predicate (op, mode);
37249 if (last_arg_count && (i + 1) == nargs)
37251 /* SIMD shift insns take either an 8-bit immediate or
37252 register as count. But builtin functions take int as
37253 count. If count doesn't match, we put it in register. */
37254 if (!match)
37256 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37257 if (!insn_p->operand[i + 1].predicate (op, mode))
37258 op = copy_to_reg (op);
37261 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37262 (!mask_pos && (nargs - i) <= nargs_constant))
37264 if (!match)
37265 switch (icode)
37267 case CODE_FOR_avx_vinsertf128v4di:
37268 case CODE_FOR_avx_vextractf128v4di:
37269 error ("the last argument must be an 1-bit immediate");
37270 return const0_rtx;
37272 case CODE_FOR_avx512f_cmpv8di3_mask:
37273 case CODE_FOR_avx512f_cmpv16si3_mask:
37274 case CODE_FOR_avx512f_ucmpv8di3_mask:
37275 case CODE_FOR_avx512f_ucmpv16si3_mask:
37276 case CODE_FOR_avx512vl_cmpv4di3_mask:
37277 case CODE_FOR_avx512vl_cmpv8si3_mask:
37278 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37279 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37280 case CODE_FOR_avx512vl_cmpv2di3_mask:
37281 case CODE_FOR_avx512vl_cmpv4si3_mask:
37282 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37283 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37284 error ("the last argument must be a 3-bit immediate");
37285 return const0_rtx;
37287 case CODE_FOR_sse4_1_roundsd:
37288 case CODE_FOR_sse4_1_roundss:
37290 case CODE_FOR_sse4_1_roundpd:
37291 case CODE_FOR_sse4_1_roundps:
37292 case CODE_FOR_avx_roundpd256:
37293 case CODE_FOR_avx_roundps256:
37295 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37296 case CODE_FOR_sse4_1_roundps_sfix:
37297 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37298 case CODE_FOR_avx_roundps_sfix256:
37300 case CODE_FOR_sse4_1_blendps:
37301 case CODE_FOR_avx_blendpd256:
37302 case CODE_FOR_avx_vpermilv4df:
37303 case CODE_FOR_avx_vpermilv4df_mask:
37304 case CODE_FOR_avx512f_getmantv8df_mask:
37305 case CODE_FOR_avx512f_getmantv16sf_mask:
37306 case CODE_FOR_avx512vl_getmantv8sf_mask:
37307 case CODE_FOR_avx512vl_getmantv4df_mask:
37308 case CODE_FOR_avx512vl_getmantv4sf_mask:
37309 case CODE_FOR_avx512vl_getmantv2df_mask:
37310 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37311 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37312 case CODE_FOR_avx512dq_rangepv4df_mask:
37313 case CODE_FOR_avx512dq_rangepv8sf_mask:
37314 case CODE_FOR_avx512dq_rangepv2df_mask:
37315 case CODE_FOR_avx512dq_rangepv4sf_mask:
37316 case CODE_FOR_avx_shufpd256_mask:
37317 error ("the last argument must be a 4-bit immediate");
37318 return const0_rtx;
37320 case CODE_FOR_sha1rnds4:
37321 case CODE_FOR_sse4_1_blendpd:
37322 case CODE_FOR_avx_vpermilv2df:
37323 case CODE_FOR_avx_vpermilv2df_mask:
37324 case CODE_FOR_xop_vpermil2v2df3:
37325 case CODE_FOR_xop_vpermil2v4sf3:
37326 case CODE_FOR_xop_vpermil2v4df3:
37327 case CODE_FOR_xop_vpermil2v8sf3:
37328 case CODE_FOR_avx512f_vinsertf32x4_mask:
37329 case CODE_FOR_avx512f_vinserti32x4_mask:
37330 case CODE_FOR_avx512f_vextractf32x4_mask:
37331 case CODE_FOR_avx512f_vextracti32x4_mask:
37332 case CODE_FOR_sse2_shufpd:
37333 case CODE_FOR_sse2_shufpd_mask:
37334 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37335 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37336 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37337 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37338 error ("the last argument must be a 2-bit immediate");
37339 return const0_rtx;
37341 case CODE_FOR_avx_vextractf128v4df:
37342 case CODE_FOR_avx_vextractf128v8sf:
37343 case CODE_FOR_avx_vextractf128v8si:
37344 case CODE_FOR_avx_vinsertf128v4df:
37345 case CODE_FOR_avx_vinsertf128v8sf:
37346 case CODE_FOR_avx_vinsertf128v8si:
37347 case CODE_FOR_avx512f_vinsertf64x4_mask:
37348 case CODE_FOR_avx512f_vinserti64x4_mask:
37349 case CODE_FOR_avx512f_vextractf64x4_mask:
37350 case CODE_FOR_avx512f_vextracti64x4_mask:
37351 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37352 case CODE_FOR_avx512dq_vinserti32x8_mask:
37353 case CODE_FOR_avx512vl_vinsertv4df:
37354 case CODE_FOR_avx512vl_vinsertv4di:
37355 case CODE_FOR_avx512vl_vinsertv8sf:
37356 case CODE_FOR_avx512vl_vinsertv8si:
37357 error ("the last argument must be a 1-bit immediate");
37358 return const0_rtx;
37360 case CODE_FOR_avx_vmcmpv2df3:
37361 case CODE_FOR_avx_vmcmpv4sf3:
37362 case CODE_FOR_avx_cmpv2df3:
37363 case CODE_FOR_avx_cmpv4sf3:
37364 case CODE_FOR_avx_cmpv4df3:
37365 case CODE_FOR_avx_cmpv8sf3:
37366 case CODE_FOR_avx512f_cmpv8df3_mask:
37367 case CODE_FOR_avx512f_cmpv16sf3_mask:
37368 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37369 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37370 error ("the last argument must be a 5-bit immediate");
37371 return const0_rtx;
37373 default:
37374 switch (nargs_constant)
37376 case 2:
37377 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37378 (!mask_pos && (nargs - i) == nargs_constant))
37380 error ("the next to last argument must be an 8-bit immediate");
37381 break;
37383 case 1:
37384 error ("the last argument must be an 8-bit immediate");
37385 break;
37386 default:
37387 gcc_unreachable ();
37389 return const0_rtx;
37392 else
37394 if (VECTOR_MODE_P (mode))
37395 op = safe_vector_operand (op, mode);
37397 /* If we aren't optimizing, only allow one memory operand to
37398 be generated. */
37399 if (memory_operand (op, mode))
37400 num_memory++;
37402 op = fixup_modeless_constant (op, mode);
37404 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37406 if (optimize || !match || num_memory > 1)
37407 op = copy_to_mode_reg (mode, op);
37409 else
37411 op = copy_to_reg (op);
37412 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37416 args[i].op = op;
37417 args[i].mode = mode;
37420 switch (nargs)
37422 case 1:
37423 pat = GEN_FCN (icode) (real_target, args[0].op);
37424 break;
37425 case 2:
37426 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37427 break;
37428 case 3:
37429 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37430 args[2].op);
37431 break;
37432 case 4:
37433 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37434 args[2].op, args[3].op);
37435 break;
37436 case 5:
37437 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37438 args[2].op, args[3].op, args[4].op);
37439 case 6:
37440 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37441 args[2].op, args[3].op, args[4].op,
37442 args[5].op);
37443 break;
37444 default:
37445 gcc_unreachable ();
37448 if (! pat)
37449 return 0;
37451 emit_insn (pat);
37452 return target;
37455 /* Transform pattern of following layout:
37456 (parallel [
37457 set (A B)
37458 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37460 into:
37461 (set (A B))
37464 (parallel [ A B
37466 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37469 into:
37470 (parallel [ A B ... ]) */
37472 static rtx
37473 ix86_erase_embedded_rounding (rtx pat)
37475 if (GET_CODE (pat) == INSN)
37476 pat = PATTERN (pat);
37478 gcc_assert (GET_CODE (pat) == PARALLEL);
37480 if (XVECLEN (pat, 0) == 2)
37482 rtx p0 = XVECEXP (pat, 0, 0);
37483 rtx p1 = XVECEXP (pat, 0, 1);
37485 gcc_assert (GET_CODE (p0) == SET
37486 && GET_CODE (p1) == UNSPEC
37487 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37489 return p0;
37491 else
37493 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37494 int i = 0;
37495 int j = 0;
37497 for (; i < XVECLEN (pat, 0); ++i)
37499 rtx elem = XVECEXP (pat, 0, i);
37500 if (GET_CODE (elem) != UNSPEC
37501 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37502 res [j++] = elem;
37505 /* No more than 1 occurence was removed. */
37506 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37508 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37512 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37513 with rounding. */
37514 static rtx
37515 ix86_expand_sse_comi_round (const struct builtin_description *d,
37516 tree exp, rtx target)
37518 rtx pat, set_dst;
37519 tree arg0 = CALL_EXPR_ARG (exp, 0);
37520 tree arg1 = CALL_EXPR_ARG (exp, 1);
37521 tree arg2 = CALL_EXPR_ARG (exp, 2);
37522 tree arg3 = CALL_EXPR_ARG (exp, 3);
37523 rtx op0 = expand_normal (arg0);
37524 rtx op1 = expand_normal (arg1);
37525 rtx op2 = expand_normal (arg2);
37526 rtx op3 = expand_normal (arg3);
37527 enum insn_code icode = d->icode;
37528 const struct insn_data_d *insn_p = &insn_data[icode];
37529 machine_mode mode0 = insn_p->operand[0].mode;
37530 machine_mode mode1 = insn_p->operand[1].mode;
37531 enum rtx_code comparison = UNEQ;
37532 bool need_ucomi = false;
37534 /* See avxintrin.h for values. */
37535 enum rtx_code comi_comparisons[32] =
37537 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37538 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37539 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37541 bool need_ucomi_values[32] =
37543 true, false, false, true, true, false, false, true,
37544 true, false, false, true, true, false, false, true,
37545 false, true, true, false, false, true, true, false,
37546 false, true, true, false, false, true, true, false
37549 if (!CONST_INT_P (op2))
37551 error ("the third argument must be comparison constant");
37552 return const0_rtx;
37554 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37556 error ("incorrect comparison mode");
37557 return const0_rtx;
37560 if (!insn_p->operand[2].predicate (op3, SImode))
37562 error ("incorrect rounding operand");
37563 return const0_rtx;
37566 comparison = comi_comparisons[INTVAL (op2)];
37567 need_ucomi = need_ucomi_values[INTVAL (op2)];
37569 if (VECTOR_MODE_P (mode0))
37570 op0 = safe_vector_operand (op0, mode0);
37571 if (VECTOR_MODE_P (mode1))
37572 op1 = safe_vector_operand (op1, mode1);
37574 target = gen_reg_rtx (SImode);
37575 emit_move_insn (target, const0_rtx);
37576 target = gen_rtx_SUBREG (QImode, target, 0);
37578 if ((optimize && !register_operand (op0, mode0))
37579 || !insn_p->operand[0].predicate (op0, mode0))
37580 op0 = copy_to_mode_reg (mode0, op0);
37581 if ((optimize && !register_operand (op1, mode1))
37582 || !insn_p->operand[1].predicate (op1, mode1))
37583 op1 = copy_to_mode_reg (mode1, op1);
37585 if (need_ucomi)
37586 icode = icode == CODE_FOR_sse_comi_round
37587 ? CODE_FOR_sse_ucomi_round
37588 : CODE_FOR_sse2_ucomi_round;
37590 pat = GEN_FCN (icode) (op0, op1, op3);
37591 if (! pat)
37592 return 0;
37594 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37595 if (INTVAL (op3) == NO_ROUND)
37597 pat = ix86_erase_embedded_rounding (pat);
37598 if (! pat)
37599 return 0;
37601 set_dst = SET_DEST (pat);
37603 else
37605 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37606 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37609 emit_insn (pat);
37610 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37611 gen_rtx_fmt_ee (comparison, QImode,
37612 set_dst,
37613 const0_rtx)));
37615 return SUBREG_REG (target);
37618 static rtx
37619 ix86_expand_round_builtin (const struct builtin_description *d,
37620 tree exp, rtx target)
37622 rtx pat;
37623 unsigned int i, nargs;
37624 struct
37626 rtx op;
37627 machine_mode mode;
37628 } args[6];
37629 enum insn_code icode = d->icode;
37630 const struct insn_data_d *insn_p = &insn_data[icode];
37631 machine_mode tmode = insn_p->operand[0].mode;
37632 unsigned int nargs_constant = 0;
37633 unsigned int redundant_embed_rnd = 0;
37635 switch ((enum ix86_builtin_func_type) d->flag)
37637 case UINT64_FTYPE_V2DF_INT:
37638 case UINT64_FTYPE_V4SF_INT:
37639 case UINT_FTYPE_V2DF_INT:
37640 case UINT_FTYPE_V4SF_INT:
37641 case INT64_FTYPE_V2DF_INT:
37642 case INT64_FTYPE_V4SF_INT:
37643 case INT_FTYPE_V2DF_INT:
37644 case INT_FTYPE_V4SF_INT:
37645 nargs = 2;
37646 break;
37647 case V4SF_FTYPE_V4SF_UINT_INT:
37648 case V4SF_FTYPE_V4SF_UINT64_INT:
37649 case V2DF_FTYPE_V2DF_UINT64_INT:
37650 case V4SF_FTYPE_V4SF_INT_INT:
37651 case V4SF_FTYPE_V4SF_INT64_INT:
37652 case V2DF_FTYPE_V2DF_INT64_INT:
37653 case V4SF_FTYPE_V4SF_V4SF_INT:
37654 case V2DF_FTYPE_V2DF_V2DF_INT:
37655 case V4SF_FTYPE_V4SF_V2DF_INT:
37656 case V2DF_FTYPE_V2DF_V4SF_INT:
37657 nargs = 3;
37658 break;
37659 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37660 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37661 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37662 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37663 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37664 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37665 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37666 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37667 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37668 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37669 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37670 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37671 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37672 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37673 nargs = 4;
37674 break;
37675 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37676 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37677 nargs_constant = 2;
37678 nargs = 4;
37679 break;
37680 case INT_FTYPE_V4SF_V4SF_INT_INT:
37681 case INT_FTYPE_V2DF_V2DF_INT_INT:
37682 return ix86_expand_sse_comi_round (d, exp, target);
37683 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37684 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37685 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37686 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37687 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37688 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37689 nargs = 5;
37690 break;
37691 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37692 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37693 nargs_constant = 4;
37694 nargs = 5;
37695 break;
37696 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37697 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37698 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37699 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37700 nargs_constant = 3;
37701 nargs = 5;
37702 break;
37703 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37704 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37705 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37706 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37707 nargs = 6;
37708 nargs_constant = 4;
37709 break;
37710 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37711 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37712 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37713 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37714 nargs = 6;
37715 nargs_constant = 3;
37716 break;
37717 default:
37718 gcc_unreachable ();
37720 gcc_assert (nargs <= ARRAY_SIZE (args));
37722 if (optimize
37723 || target == 0
37724 || GET_MODE (target) != tmode
37725 || !insn_p->operand[0].predicate (target, tmode))
37726 target = gen_reg_rtx (tmode);
37728 for (i = 0; i < nargs; i++)
37730 tree arg = CALL_EXPR_ARG (exp, i);
37731 rtx op = expand_normal (arg);
37732 machine_mode mode = insn_p->operand[i + 1].mode;
37733 bool match = insn_p->operand[i + 1].predicate (op, mode);
37735 if (i == nargs - nargs_constant)
37737 if (!match)
37739 switch (icode)
37741 case CODE_FOR_avx512f_getmantv8df_mask_round:
37742 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37743 case CODE_FOR_avx512f_vgetmantv2df_round:
37744 case CODE_FOR_avx512f_vgetmantv4sf_round:
37745 error ("the immediate argument must be a 4-bit immediate");
37746 return const0_rtx;
37747 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37748 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37749 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37750 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37751 error ("the immediate argument must be a 5-bit immediate");
37752 return const0_rtx;
37753 default:
37754 error ("the immediate argument must be an 8-bit immediate");
37755 return const0_rtx;
37759 else if (i == nargs-1)
37761 if (!insn_p->operand[nargs].predicate (op, SImode))
37763 error ("incorrect rounding operand");
37764 return const0_rtx;
37767 /* If there is no rounding use normal version of the pattern. */
37768 if (INTVAL (op) == NO_ROUND)
37769 redundant_embed_rnd = 1;
37771 else
37773 if (VECTOR_MODE_P (mode))
37774 op = safe_vector_operand (op, mode);
37776 op = fixup_modeless_constant (op, mode);
37778 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37780 if (optimize || !match)
37781 op = copy_to_mode_reg (mode, op);
37783 else
37785 op = copy_to_reg (op);
37786 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37790 args[i].op = op;
37791 args[i].mode = mode;
37794 switch (nargs)
37796 case 1:
37797 pat = GEN_FCN (icode) (target, args[0].op);
37798 break;
37799 case 2:
37800 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37801 break;
37802 case 3:
37803 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37804 args[2].op);
37805 break;
37806 case 4:
37807 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37808 args[2].op, args[3].op);
37809 break;
37810 case 5:
37811 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37812 args[2].op, args[3].op, args[4].op);
37813 case 6:
37814 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37815 args[2].op, args[3].op, args[4].op,
37816 args[5].op);
37817 break;
37818 default:
37819 gcc_unreachable ();
37822 if (!pat)
37823 return 0;
37825 if (redundant_embed_rnd)
37826 pat = ix86_erase_embedded_rounding (pat);
37828 emit_insn (pat);
37829 return target;
37832 /* Subroutine of ix86_expand_builtin to take care of special insns
37833 with variable number of operands. */
37835 static rtx
37836 ix86_expand_special_args_builtin (const struct builtin_description *d,
37837 tree exp, rtx target)
37839 tree arg;
37840 rtx pat, op;
37841 unsigned int i, nargs, arg_adjust, memory;
37842 bool aligned_mem = false;
37843 struct
37845 rtx op;
37846 machine_mode mode;
37847 } args[3];
37848 enum insn_code icode = d->icode;
37849 bool last_arg_constant = false;
37850 const struct insn_data_d *insn_p = &insn_data[icode];
37851 machine_mode tmode = insn_p->operand[0].mode;
37852 enum { load, store } klass;
37854 switch ((enum ix86_builtin_func_type) d->flag)
37856 case VOID_FTYPE_VOID:
37857 emit_insn (GEN_FCN (icode) (target));
37858 return 0;
37859 case VOID_FTYPE_UINT64:
37860 case VOID_FTYPE_UNSIGNED:
37861 nargs = 0;
37862 klass = store;
37863 memory = 0;
37864 break;
37866 case INT_FTYPE_VOID:
37867 case USHORT_FTYPE_VOID:
37868 case UINT64_FTYPE_VOID:
37869 case UNSIGNED_FTYPE_VOID:
37870 nargs = 0;
37871 klass = load;
37872 memory = 0;
37873 break;
37874 case UINT64_FTYPE_PUNSIGNED:
37875 case V2DI_FTYPE_PV2DI:
37876 case V4DI_FTYPE_PV4DI:
37877 case V32QI_FTYPE_PCCHAR:
37878 case V16QI_FTYPE_PCCHAR:
37879 case V8SF_FTYPE_PCV4SF:
37880 case V8SF_FTYPE_PCFLOAT:
37881 case V4SF_FTYPE_PCFLOAT:
37882 case V4DF_FTYPE_PCV2DF:
37883 case V4DF_FTYPE_PCDOUBLE:
37884 case V2DF_FTYPE_PCDOUBLE:
37885 case VOID_FTYPE_PVOID:
37886 case V16SI_FTYPE_PV4SI:
37887 case V16SF_FTYPE_PV4SF:
37888 case V8DI_FTYPE_PV4DI:
37889 case V8DI_FTYPE_PV8DI:
37890 case V8DF_FTYPE_PV4DF:
37891 nargs = 1;
37892 klass = load;
37893 memory = 0;
37894 switch (icode)
37896 case CODE_FOR_sse4_1_movntdqa:
37897 case CODE_FOR_avx2_movntdqa:
37898 case CODE_FOR_avx512f_movntdqa:
37899 aligned_mem = true;
37900 break;
37901 default:
37902 break;
37904 break;
37905 case VOID_FTYPE_PV2SF_V4SF:
37906 case VOID_FTYPE_PV8DI_V8DI:
37907 case VOID_FTYPE_PV4DI_V4DI:
37908 case VOID_FTYPE_PV2DI_V2DI:
37909 case VOID_FTYPE_PCHAR_V32QI:
37910 case VOID_FTYPE_PCHAR_V16QI:
37911 case VOID_FTYPE_PFLOAT_V16SF:
37912 case VOID_FTYPE_PFLOAT_V8SF:
37913 case VOID_FTYPE_PFLOAT_V4SF:
37914 case VOID_FTYPE_PDOUBLE_V8DF:
37915 case VOID_FTYPE_PDOUBLE_V4DF:
37916 case VOID_FTYPE_PDOUBLE_V2DF:
37917 case VOID_FTYPE_PLONGLONG_LONGLONG:
37918 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37919 case VOID_FTYPE_PINT_INT:
37920 nargs = 1;
37921 klass = store;
37922 /* Reserve memory operand for target. */
37923 memory = ARRAY_SIZE (args);
37924 switch (icode)
37926 /* These builtins and instructions require the memory
37927 to be properly aligned. */
37928 case CODE_FOR_avx_movntv4di:
37929 case CODE_FOR_sse2_movntv2di:
37930 case CODE_FOR_avx_movntv8sf:
37931 case CODE_FOR_sse_movntv4sf:
37932 case CODE_FOR_sse4a_vmmovntv4sf:
37933 case CODE_FOR_avx_movntv4df:
37934 case CODE_FOR_sse2_movntv2df:
37935 case CODE_FOR_sse4a_vmmovntv2df:
37936 case CODE_FOR_sse2_movntidi:
37937 case CODE_FOR_sse_movntq:
37938 case CODE_FOR_sse2_movntisi:
37939 case CODE_FOR_avx512f_movntv16sf:
37940 case CODE_FOR_avx512f_movntv8df:
37941 case CODE_FOR_avx512f_movntv8di:
37942 aligned_mem = true;
37943 break;
37944 default:
37945 break;
37947 break;
37948 case V4SF_FTYPE_V4SF_PCV2SF:
37949 case V2DF_FTYPE_V2DF_PCDOUBLE:
37950 nargs = 2;
37951 klass = load;
37952 memory = 1;
37953 break;
37954 case V8SF_FTYPE_PCV8SF_V8SI:
37955 case V4DF_FTYPE_PCV4DF_V4DI:
37956 case V4SF_FTYPE_PCV4SF_V4SI:
37957 case V2DF_FTYPE_PCV2DF_V2DI:
37958 case V8SI_FTYPE_PCV8SI_V8SI:
37959 case V4DI_FTYPE_PCV4DI_V4DI:
37960 case V4SI_FTYPE_PCV4SI_V4SI:
37961 case V2DI_FTYPE_PCV2DI_V2DI:
37962 nargs = 2;
37963 klass = load;
37964 memory = 0;
37965 break;
37966 case VOID_FTYPE_PV8DF_V8DF_QI:
37967 case VOID_FTYPE_PV16SF_V16SF_HI:
37968 case VOID_FTYPE_PV8DI_V8DI_QI:
37969 case VOID_FTYPE_PV4DI_V4DI_QI:
37970 case VOID_FTYPE_PV2DI_V2DI_QI:
37971 case VOID_FTYPE_PV16SI_V16SI_HI:
37972 case VOID_FTYPE_PV8SI_V8SI_QI:
37973 case VOID_FTYPE_PV4SI_V4SI_QI:
37974 switch (icode)
37976 /* These builtins and instructions require the memory
37977 to be properly aligned. */
37978 case CODE_FOR_avx512f_storev16sf_mask:
37979 case CODE_FOR_avx512f_storev16si_mask:
37980 case CODE_FOR_avx512f_storev8df_mask:
37981 case CODE_FOR_avx512f_storev8di_mask:
37982 case CODE_FOR_avx512vl_storev8sf_mask:
37983 case CODE_FOR_avx512vl_storev8si_mask:
37984 case CODE_FOR_avx512vl_storev4df_mask:
37985 case CODE_FOR_avx512vl_storev4di_mask:
37986 case CODE_FOR_avx512vl_storev4sf_mask:
37987 case CODE_FOR_avx512vl_storev4si_mask:
37988 case CODE_FOR_avx512vl_storev2df_mask:
37989 case CODE_FOR_avx512vl_storev2di_mask:
37990 aligned_mem = true;
37991 break;
37992 default:
37993 break;
37995 /* FALLTHRU */
37996 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37997 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37998 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37999 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38000 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38001 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38002 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38003 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38004 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38005 case VOID_FTYPE_PFLOAT_V4SF_QI:
38006 case VOID_FTYPE_PV8SI_V8DI_QI:
38007 case VOID_FTYPE_PV8HI_V8DI_QI:
38008 case VOID_FTYPE_PV16HI_V16SI_HI:
38009 case VOID_FTYPE_PV16QI_V8DI_QI:
38010 case VOID_FTYPE_PV16QI_V16SI_HI:
38011 case VOID_FTYPE_PV4SI_V4DI_QI:
38012 case VOID_FTYPE_PV4SI_V2DI_QI:
38013 case VOID_FTYPE_PV8HI_V4DI_QI:
38014 case VOID_FTYPE_PV8HI_V2DI_QI:
38015 case VOID_FTYPE_PV8HI_V8SI_QI:
38016 case VOID_FTYPE_PV8HI_V4SI_QI:
38017 case VOID_FTYPE_PV16QI_V4DI_QI:
38018 case VOID_FTYPE_PV16QI_V2DI_QI:
38019 case VOID_FTYPE_PV16QI_V8SI_QI:
38020 case VOID_FTYPE_PV16QI_V4SI_QI:
38021 case VOID_FTYPE_PV8HI_V8HI_QI:
38022 case VOID_FTYPE_PV16HI_V16HI_HI:
38023 case VOID_FTYPE_PV32HI_V32HI_SI:
38024 case VOID_FTYPE_PV16QI_V16QI_HI:
38025 case VOID_FTYPE_PV32QI_V32QI_SI:
38026 case VOID_FTYPE_PV64QI_V64QI_DI:
38027 case VOID_FTYPE_PV4DF_V4DF_QI:
38028 case VOID_FTYPE_PV2DF_V2DF_QI:
38029 case VOID_FTYPE_PV8SF_V8SF_QI:
38030 case VOID_FTYPE_PV4SF_V4SF_QI:
38031 nargs = 2;
38032 klass = store;
38033 /* Reserve memory operand for target. */
38034 memory = ARRAY_SIZE (args);
38035 break;
38036 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38037 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38038 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38039 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38040 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38041 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38042 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38043 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38044 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38045 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38046 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38047 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38048 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38049 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38050 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38051 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38052 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38053 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38054 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38055 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38056 nargs = 3;
38057 klass = load;
38058 memory = 0;
38059 switch (icode)
38061 /* These builtins and instructions require the memory
38062 to be properly aligned. */
38063 case CODE_FOR_avx512f_loadv16sf_mask:
38064 case CODE_FOR_avx512f_loadv16si_mask:
38065 case CODE_FOR_avx512f_loadv8df_mask:
38066 case CODE_FOR_avx512f_loadv8di_mask:
38067 case CODE_FOR_avx512vl_loadv8sf_mask:
38068 case CODE_FOR_avx512vl_loadv8si_mask:
38069 case CODE_FOR_avx512vl_loadv4df_mask:
38070 case CODE_FOR_avx512vl_loadv4di_mask:
38071 case CODE_FOR_avx512vl_loadv4sf_mask:
38072 case CODE_FOR_avx512vl_loadv4si_mask:
38073 case CODE_FOR_avx512vl_loadv2df_mask:
38074 case CODE_FOR_avx512vl_loadv2di_mask:
38075 case CODE_FOR_avx512bw_loadv64qi_mask:
38076 case CODE_FOR_avx512vl_loadv32qi_mask:
38077 case CODE_FOR_avx512vl_loadv16qi_mask:
38078 case CODE_FOR_avx512bw_loadv32hi_mask:
38079 case CODE_FOR_avx512vl_loadv16hi_mask:
38080 case CODE_FOR_avx512vl_loadv8hi_mask:
38081 aligned_mem = true;
38082 break;
38083 default:
38084 break;
38086 break;
38087 case VOID_FTYPE_UINT_UINT_UINT:
38088 case VOID_FTYPE_UINT64_UINT_UINT:
38089 case UCHAR_FTYPE_UINT_UINT_UINT:
38090 case UCHAR_FTYPE_UINT64_UINT_UINT:
38091 nargs = 3;
38092 klass = load;
38093 memory = ARRAY_SIZE (args);
38094 last_arg_constant = true;
38095 break;
38096 default:
38097 gcc_unreachable ();
38100 gcc_assert (nargs <= ARRAY_SIZE (args));
38102 if (klass == store)
38104 arg = CALL_EXPR_ARG (exp, 0);
38105 op = expand_normal (arg);
38106 gcc_assert (target == 0);
38107 if (memory)
38109 op = ix86_zero_extend_to_Pmode (op);
38110 target = gen_rtx_MEM (tmode, op);
38111 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38112 on it. Try to improve it using get_pointer_alignment,
38113 and if the special builtin is one that requires strict
38114 mode alignment, also from it's GET_MODE_ALIGNMENT.
38115 Failure to do so could lead to ix86_legitimate_combined_insn
38116 rejecting all changes to such insns. */
38117 unsigned int align = get_pointer_alignment (arg);
38118 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38119 align = GET_MODE_ALIGNMENT (tmode);
38120 if (MEM_ALIGN (target) < align)
38121 set_mem_align (target, align);
38123 else
38124 target = force_reg (tmode, op);
38125 arg_adjust = 1;
38127 else
38129 arg_adjust = 0;
38130 if (optimize
38131 || target == 0
38132 || !register_operand (target, tmode)
38133 || GET_MODE (target) != tmode)
38134 target = gen_reg_rtx (tmode);
38137 for (i = 0; i < nargs; i++)
38139 machine_mode mode = insn_p->operand[i + 1].mode;
38140 bool match;
38142 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38143 op = expand_normal (arg);
38144 match = insn_p->operand[i + 1].predicate (op, mode);
38146 if (last_arg_constant && (i + 1) == nargs)
38148 if (!match)
38150 if (icode == CODE_FOR_lwp_lwpvalsi3
38151 || icode == CODE_FOR_lwp_lwpinssi3
38152 || icode == CODE_FOR_lwp_lwpvaldi3
38153 || icode == CODE_FOR_lwp_lwpinsdi3)
38154 error ("the last argument must be a 32-bit immediate");
38155 else
38156 error ("the last argument must be an 8-bit immediate");
38157 return const0_rtx;
38160 else
38162 if (i == memory)
38164 /* This must be the memory operand. */
38165 op = ix86_zero_extend_to_Pmode (op);
38166 op = gen_rtx_MEM (mode, op);
38167 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38168 on it. Try to improve it using get_pointer_alignment,
38169 and if the special builtin is one that requires strict
38170 mode alignment, also from it's GET_MODE_ALIGNMENT.
38171 Failure to do so could lead to ix86_legitimate_combined_insn
38172 rejecting all changes to such insns. */
38173 unsigned int align = get_pointer_alignment (arg);
38174 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38175 align = GET_MODE_ALIGNMENT (mode);
38176 if (MEM_ALIGN (op) < align)
38177 set_mem_align (op, align);
38179 else
38181 /* This must be register. */
38182 if (VECTOR_MODE_P (mode))
38183 op = safe_vector_operand (op, mode);
38185 op = fixup_modeless_constant (op, mode);
38187 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38188 op = copy_to_mode_reg (mode, op);
38189 else
38191 op = copy_to_reg (op);
38192 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38197 args[i].op = op;
38198 args[i].mode = mode;
38201 switch (nargs)
38203 case 0:
38204 pat = GEN_FCN (icode) (target);
38205 break;
38206 case 1:
38207 pat = GEN_FCN (icode) (target, args[0].op);
38208 break;
38209 case 2:
38210 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38211 break;
38212 case 3:
38213 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38214 break;
38215 default:
38216 gcc_unreachable ();
38219 if (! pat)
38220 return 0;
38221 emit_insn (pat);
38222 return klass == store ? 0 : target;
38225 /* Return the integer constant in ARG. Constrain it to be in the range
38226 of the subparts of VEC_TYPE; issue an error if not. */
38228 static int
38229 get_element_number (tree vec_type, tree arg)
38231 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38233 if (!tree_fits_uhwi_p (arg)
38234 || (elt = tree_to_uhwi (arg), elt > max))
38236 error ("selector must be an integer constant in the range 0..%wi", max);
38237 return 0;
38240 return elt;
38243 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38244 ix86_expand_vector_init. We DO have language-level syntax for this, in
38245 the form of (type){ init-list }. Except that since we can't place emms
38246 instructions from inside the compiler, we can't allow the use of MMX
38247 registers unless the user explicitly asks for it. So we do *not* define
38248 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38249 we have builtins invoked by mmintrin.h that gives us license to emit
38250 these sorts of instructions. */
38252 static rtx
38253 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38255 machine_mode tmode = TYPE_MODE (type);
38256 machine_mode inner_mode = GET_MODE_INNER (tmode);
38257 int i, n_elt = GET_MODE_NUNITS (tmode);
38258 rtvec v = rtvec_alloc (n_elt);
38260 gcc_assert (VECTOR_MODE_P (tmode));
38261 gcc_assert (call_expr_nargs (exp) == n_elt);
38263 for (i = 0; i < n_elt; ++i)
38265 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38266 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38269 if (!target || !register_operand (target, tmode))
38270 target = gen_reg_rtx (tmode);
38272 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38273 return target;
38276 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38277 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38278 had a language-level syntax for referencing vector elements. */
38280 static rtx
38281 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38283 machine_mode tmode, mode0;
38284 tree arg0, arg1;
38285 int elt;
38286 rtx op0;
38288 arg0 = CALL_EXPR_ARG (exp, 0);
38289 arg1 = CALL_EXPR_ARG (exp, 1);
38291 op0 = expand_normal (arg0);
38292 elt = get_element_number (TREE_TYPE (arg0), arg1);
38294 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38295 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38296 gcc_assert (VECTOR_MODE_P (mode0));
38298 op0 = force_reg (mode0, op0);
38300 if (optimize || !target || !register_operand (target, tmode))
38301 target = gen_reg_rtx (tmode);
38303 ix86_expand_vector_extract (true, target, op0, elt);
38305 return target;
38308 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38309 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38310 a language-level syntax for referencing vector elements. */
38312 static rtx
38313 ix86_expand_vec_set_builtin (tree exp)
38315 machine_mode tmode, mode1;
38316 tree arg0, arg1, arg2;
38317 int elt;
38318 rtx op0, op1, target;
38320 arg0 = CALL_EXPR_ARG (exp, 0);
38321 arg1 = CALL_EXPR_ARG (exp, 1);
38322 arg2 = CALL_EXPR_ARG (exp, 2);
38324 tmode = TYPE_MODE (TREE_TYPE (arg0));
38325 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38326 gcc_assert (VECTOR_MODE_P (tmode));
38328 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38329 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38330 elt = get_element_number (TREE_TYPE (arg0), arg2);
38332 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38333 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38335 op0 = force_reg (tmode, op0);
38336 op1 = force_reg (mode1, op1);
38338 /* OP0 is the source of these builtin functions and shouldn't be
38339 modified. Create a copy, use it and return it as target. */
38340 target = gen_reg_rtx (tmode);
38341 emit_move_insn (target, op0);
38342 ix86_expand_vector_set (true, target, op1, elt);
38344 return target;
38347 /* Emit conditional move of SRC to DST with condition
38348 OP1 CODE OP2. */
38349 static void
38350 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38352 rtx t;
38354 if (TARGET_CMOVE)
38356 t = ix86_expand_compare (code, op1, op2);
38357 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38358 src, dst)));
38360 else
38362 rtx nomove = gen_label_rtx ();
38363 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38364 const0_rtx, GET_MODE (op1), 1, nomove);
38365 emit_move_insn (dst, src);
38366 emit_label (nomove);
38370 /* Choose max of DST and SRC and put it to DST. */
38371 static void
38372 ix86_emit_move_max (rtx dst, rtx src)
38374 ix86_emit_cmove (dst, src, LTU, dst, src);
38377 /* Expand an expression EXP that calls a built-in function,
38378 with result going to TARGET if that's convenient
38379 (and in mode MODE if that's convenient).
38380 SUBTARGET may be used as the target for computing one of EXP's operands.
38381 IGNORE is nonzero if the value is to be ignored. */
38383 static rtx
38384 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38385 machine_mode mode, int ignore)
38387 const struct builtin_description *d;
38388 size_t i;
38389 enum insn_code icode;
38390 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38391 tree arg0, arg1, arg2, arg3, arg4;
38392 rtx op0, op1, op2, op3, op4, pat, insn;
38393 machine_mode mode0, mode1, mode2, mode3, mode4;
38394 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38396 /* For CPU builtins that can be folded, fold first and expand the fold. */
38397 switch (fcode)
38399 case IX86_BUILTIN_CPU_INIT:
38401 /* Make it call __cpu_indicator_init in libgcc. */
38402 tree call_expr, fndecl, type;
38403 type = build_function_type_list (integer_type_node, NULL_TREE);
38404 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38405 call_expr = build_call_expr (fndecl, 0);
38406 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38408 case IX86_BUILTIN_CPU_IS:
38409 case IX86_BUILTIN_CPU_SUPPORTS:
38411 tree arg0 = CALL_EXPR_ARG (exp, 0);
38412 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38413 gcc_assert (fold_expr != NULL_TREE);
38414 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38418 /* Determine whether the builtin function is available under the current ISA.
38419 Originally the builtin was not created if it wasn't applicable to the
38420 current ISA based on the command line switches. With function specific
38421 options, we need to check in the context of the function making the call
38422 whether it is supported. */
38423 if (ix86_builtins_isa[fcode].isa
38424 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38426 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38427 NULL, (enum fpmath_unit) 0, false);
38429 if (!opts)
38430 error ("%qE needs unknown isa option", fndecl);
38431 else
38433 gcc_assert (opts != NULL);
38434 error ("%qE needs isa option %s", fndecl, opts);
38435 free (opts);
38437 return const0_rtx;
38440 switch (fcode)
38442 case IX86_BUILTIN_BNDMK:
38443 if (!target
38444 || GET_MODE (target) != BNDmode
38445 || !register_operand (target, BNDmode))
38446 target = gen_reg_rtx (BNDmode);
38448 arg0 = CALL_EXPR_ARG (exp, 0);
38449 arg1 = CALL_EXPR_ARG (exp, 1);
38451 op0 = expand_normal (arg0);
38452 op1 = expand_normal (arg1);
38454 if (!register_operand (op0, Pmode))
38455 op0 = ix86_zero_extend_to_Pmode (op0);
38456 if (!register_operand (op1, Pmode))
38457 op1 = ix86_zero_extend_to_Pmode (op1);
38459 /* Builtin arg1 is size of block but instruction op1 should
38460 be (size - 1). */
38461 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38462 NULL_RTX, 1, OPTAB_DIRECT);
38464 emit_insn (BNDmode == BND64mode
38465 ? gen_bnd64_mk (target, op0, op1)
38466 : gen_bnd32_mk (target, op0, op1));
38467 return target;
38469 case IX86_BUILTIN_BNDSTX:
38470 arg0 = CALL_EXPR_ARG (exp, 0);
38471 arg1 = CALL_EXPR_ARG (exp, 1);
38472 arg2 = CALL_EXPR_ARG (exp, 2);
38474 op0 = expand_normal (arg0);
38475 op1 = expand_normal (arg1);
38476 op2 = expand_normal (arg2);
38478 if (!register_operand (op0, Pmode))
38479 op0 = ix86_zero_extend_to_Pmode (op0);
38480 if (!register_operand (op1, BNDmode))
38481 op1 = copy_to_mode_reg (BNDmode, op1);
38482 if (!register_operand (op2, Pmode))
38483 op2 = ix86_zero_extend_to_Pmode (op2);
38485 emit_insn (BNDmode == BND64mode
38486 ? gen_bnd64_stx (op2, op0, op1)
38487 : gen_bnd32_stx (op2, op0, op1));
38488 return 0;
38490 case IX86_BUILTIN_BNDLDX:
38491 if (!target
38492 || GET_MODE (target) != BNDmode
38493 || !register_operand (target, BNDmode))
38494 target = gen_reg_rtx (BNDmode);
38496 arg0 = CALL_EXPR_ARG (exp, 0);
38497 arg1 = CALL_EXPR_ARG (exp, 1);
38499 op0 = expand_normal (arg0);
38500 op1 = expand_normal (arg1);
38502 if (!register_operand (op0, Pmode))
38503 op0 = ix86_zero_extend_to_Pmode (op0);
38504 if (!register_operand (op1, Pmode))
38505 op1 = ix86_zero_extend_to_Pmode (op1);
38507 emit_insn (BNDmode == BND64mode
38508 ? gen_bnd64_ldx (target, op0, op1)
38509 : gen_bnd32_ldx (target, op0, op1));
38510 return target;
38512 case IX86_BUILTIN_BNDCL:
38513 arg0 = CALL_EXPR_ARG (exp, 0);
38514 arg1 = CALL_EXPR_ARG (exp, 1);
38516 op0 = expand_normal (arg0);
38517 op1 = expand_normal (arg1);
38519 if (!register_operand (op0, Pmode))
38520 op0 = ix86_zero_extend_to_Pmode (op0);
38521 if (!register_operand (op1, BNDmode))
38522 op1 = copy_to_mode_reg (BNDmode, op1);
38524 emit_insn (BNDmode == BND64mode
38525 ? gen_bnd64_cl (op1, op0)
38526 : gen_bnd32_cl (op1, op0));
38527 return 0;
38529 case IX86_BUILTIN_BNDCU:
38530 arg0 = CALL_EXPR_ARG (exp, 0);
38531 arg1 = CALL_EXPR_ARG (exp, 1);
38533 op0 = expand_normal (arg0);
38534 op1 = expand_normal (arg1);
38536 if (!register_operand (op0, Pmode))
38537 op0 = ix86_zero_extend_to_Pmode (op0);
38538 if (!register_operand (op1, BNDmode))
38539 op1 = copy_to_mode_reg (BNDmode, op1);
38541 emit_insn (BNDmode == BND64mode
38542 ? gen_bnd64_cu (op1, op0)
38543 : gen_bnd32_cu (op1, op0));
38544 return 0;
38546 case IX86_BUILTIN_BNDRET:
38547 arg0 = CALL_EXPR_ARG (exp, 0);
38548 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38549 target = chkp_get_rtl_bounds (arg0);
38551 /* If no bounds were specified for returned value,
38552 then use INIT bounds. It usually happens when
38553 some built-in function is expanded. */
38554 if (!target)
38556 rtx t1 = gen_reg_rtx (Pmode);
38557 rtx t2 = gen_reg_rtx (Pmode);
38558 target = gen_reg_rtx (BNDmode);
38559 emit_move_insn (t1, const0_rtx);
38560 emit_move_insn (t2, constm1_rtx);
38561 emit_insn (BNDmode == BND64mode
38562 ? gen_bnd64_mk (target, t1, t2)
38563 : gen_bnd32_mk (target, t1, t2));
38566 gcc_assert (target && REG_P (target));
38567 return target;
38569 case IX86_BUILTIN_BNDNARROW:
38571 rtx m1, m1h1, m1h2, lb, ub, t1;
38573 /* Return value and lb. */
38574 arg0 = CALL_EXPR_ARG (exp, 0);
38575 /* Bounds. */
38576 arg1 = CALL_EXPR_ARG (exp, 1);
38577 /* Size. */
38578 arg2 = CALL_EXPR_ARG (exp, 2);
38580 lb = expand_normal (arg0);
38581 op1 = expand_normal (arg1);
38582 op2 = expand_normal (arg2);
38584 /* Size was passed but we need to use (size - 1) as for bndmk. */
38585 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38586 NULL_RTX, 1, OPTAB_DIRECT);
38588 /* Add LB to size and inverse to get UB. */
38589 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38590 op2, 1, OPTAB_DIRECT);
38591 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38593 if (!register_operand (lb, Pmode))
38594 lb = ix86_zero_extend_to_Pmode (lb);
38595 if (!register_operand (ub, Pmode))
38596 ub = ix86_zero_extend_to_Pmode (ub);
38598 /* We need to move bounds to memory before any computations. */
38599 if (MEM_P (op1))
38600 m1 = op1;
38601 else
38603 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38604 emit_move_insn (m1, op1);
38607 /* Generate mem expression to be used for access to LB and UB. */
38608 m1h1 = adjust_address (m1, Pmode, 0);
38609 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38611 t1 = gen_reg_rtx (Pmode);
38613 /* Compute LB. */
38614 emit_move_insn (t1, m1h1);
38615 ix86_emit_move_max (t1, lb);
38616 emit_move_insn (m1h1, t1);
38618 /* Compute UB. UB is stored in 1's complement form. Therefore
38619 we also use max here. */
38620 emit_move_insn (t1, m1h2);
38621 ix86_emit_move_max (t1, ub);
38622 emit_move_insn (m1h2, t1);
38624 op2 = gen_reg_rtx (BNDmode);
38625 emit_move_insn (op2, m1);
38627 return chkp_join_splitted_slot (lb, op2);
38630 case IX86_BUILTIN_BNDINT:
38632 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38634 if (!target
38635 || GET_MODE (target) != BNDmode
38636 || !register_operand (target, BNDmode))
38637 target = gen_reg_rtx (BNDmode);
38639 arg0 = CALL_EXPR_ARG (exp, 0);
38640 arg1 = CALL_EXPR_ARG (exp, 1);
38642 op0 = expand_normal (arg0);
38643 op1 = expand_normal (arg1);
38645 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38646 rh1 = adjust_address (res, Pmode, 0);
38647 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38649 /* Put first bounds to temporaries. */
38650 lb1 = gen_reg_rtx (Pmode);
38651 ub1 = gen_reg_rtx (Pmode);
38652 if (MEM_P (op0))
38654 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38655 emit_move_insn (ub1, adjust_address (op0, Pmode,
38656 GET_MODE_SIZE (Pmode)));
38658 else
38660 emit_move_insn (res, op0);
38661 emit_move_insn (lb1, rh1);
38662 emit_move_insn (ub1, rh2);
38665 /* Put second bounds to temporaries. */
38666 lb2 = gen_reg_rtx (Pmode);
38667 ub2 = gen_reg_rtx (Pmode);
38668 if (MEM_P (op1))
38670 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38671 emit_move_insn (ub2, adjust_address (op1, Pmode,
38672 GET_MODE_SIZE (Pmode)));
38674 else
38676 emit_move_insn (res, op1);
38677 emit_move_insn (lb2, rh1);
38678 emit_move_insn (ub2, rh2);
38681 /* Compute LB. */
38682 ix86_emit_move_max (lb1, lb2);
38683 emit_move_insn (rh1, lb1);
38685 /* Compute UB. UB is stored in 1's complement form. Therefore
38686 we also use max here. */
38687 ix86_emit_move_max (ub1, ub2);
38688 emit_move_insn (rh2, ub1);
38690 emit_move_insn (target, res);
38692 return target;
38695 case IX86_BUILTIN_SIZEOF:
38697 tree name;
38698 rtx symbol;
38700 if (!target
38701 || GET_MODE (target) != Pmode
38702 || !register_operand (target, Pmode))
38703 target = gen_reg_rtx (Pmode);
38705 arg0 = CALL_EXPR_ARG (exp, 0);
38706 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38708 name = DECL_ASSEMBLER_NAME (arg0);
38709 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38711 emit_insn (Pmode == SImode
38712 ? gen_move_size_reloc_si (target, symbol)
38713 : gen_move_size_reloc_di (target, symbol));
38715 return target;
38718 case IX86_BUILTIN_BNDLOWER:
38720 rtx mem, hmem;
38722 if (!target
38723 || GET_MODE (target) != Pmode
38724 || !register_operand (target, Pmode))
38725 target = gen_reg_rtx (Pmode);
38727 arg0 = CALL_EXPR_ARG (exp, 0);
38728 op0 = expand_normal (arg0);
38730 /* We need to move bounds to memory first. */
38731 if (MEM_P (op0))
38732 mem = op0;
38733 else
38735 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38736 emit_move_insn (mem, op0);
38739 /* Generate mem expression to access LB and load it. */
38740 hmem = adjust_address (mem, Pmode, 0);
38741 emit_move_insn (target, hmem);
38743 return target;
38746 case IX86_BUILTIN_BNDUPPER:
38748 rtx mem, hmem, res;
38750 if (!target
38751 || GET_MODE (target) != Pmode
38752 || !register_operand (target, Pmode))
38753 target = gen_reg_rtx (Pmode);
38755 arg0 = CALL_EXPR_ARG (exp, 0);
38756 op0 = expand_normal (arg0);
38758 /* We need to move bounds to memory first. */
38759 if (MEM_P (op0))
38760 mem = op0;
38761 else
38763 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38764 emit_move_insn (mem, op0);
38767 /* Generate mem expression to access UB. */
38768 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38770 /* We need to inverse all bits of UB. */
38771 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38773 if (res != target)
38774 emit_move_insn (target, res);
38776 return target;
38779 case IX86_BUILTIN_MASKMOVQ:
38780 case IX86_BUILTIN_MASKMOVDQU:
38781 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38782 ? CODE_FOR_mmx_maskmovq
38783 : CODE_FOR_sse2_maskmovdqu);
38784 /* Note the arg order is different from the operand order. */
38785 arg1 = CALL_EXPR_ARG (exp, 0);
38786 arg2 = CALL_EXPR_ARG (exp, 1);
38787 arg0 = CALL_EXPR_ARG (exp, 2);
38788 op0 = expand_normal (arg0);
38789 op1 = expand_normal (arg1);
38790 op2 = expand_normal (arg2);
38791 mode0 = insn_data[icode].operand[0].mode;
38792 mode1 = insn_data[icode].operand[1].mode;
38793 mode2 = insn_data[icode].operand[2].mode;
38795 op0 = ix86_zero_extend_to_Pmode (op0);
38796 op0 = gen_rtx_MEM (mode1, op0);
38798 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38799 op0 = copy_to_mode_reg (mode0, op0);
38800 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38801 op1 = copy_to_mode_reg (mode1, op1);
38802 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38803 op2 = copy_to_mode_reg (mode2, op2);
38804 pat = GEN_FCN (icode) (op0, op1, op2);
38805 if (! pat)
38806 return 0;
38807 emit_insn (pat);
38808 return 0;
38810 case IX86_BUILTIN_LDMXCSR:
38811 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38812 target = assign_386_stack_local (SImode, SLOT_TEMP);
38813 emit_move_insn (target, op0);
38814 emit_insn (gen_sse_ldmxcsr (target));
38815 return 0;
38817 case IX86_BUILTIN_STMXCSR:
38818 target = assign_386_stack_local (SImode, SLOT_TEMP);
38819 emit_insn (gen_sse_stmxcsr (target));
38820 return copy_to_mode_reg (SImode, target);
38822 case IX86_BUILTIN_CLFLUSH:
38823 arg0 = CALL_EXPR_ARG (exp, 0);
38824 op0 = expand_normal (arg0);
38825 icode = CODE_FOR_sse2_clflush;
38826 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38827 op0 = ix86_zero_extend_to_Pmode (op0);
38829 emit_insn (gen_sse2_clflush (op0));
38830 return 0;
38832 case IX86_BUILTIN_CLWB:
38833 arg0 = CALL_EXPR_ARG (exp, 0);
38834 op0 = expand_normal (arg0);
38835 icode = CODE_FOR_clwb;
38836 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38837 op0 = ix86_zero_extend_to_Pmode (op0);
38839 emit_insn (gen_clwb (op0));
38840 return 0;
38842 case IX86_BUILTIN_CLFLUSHOPT:
38843 arg0 = CALL_EXPR_ARG (exp, 0);
38844 op0 = expand_normal (arg0);
38845 icode = CODE_FOR_clflushopt;
38846 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38847 op0 = ix86_zero_extend_to_Pmode (op0);
38849 emit_insn (gen_clflushopt (op0));
38850 return 0;
38852 case IX86_BUILTIN_MONITOR:
38853 arg0 = CALL_EXPR_ARG (exp, 0);
38854 arg1 = CALL_EXPR_ARG (exp, 1);
38855 arg2 = CALL_EXPR_ARG (exp, 2);
38856 op0 = expand_normal (arg0);
38857 op1 = expand_normal (arg1);
38858 op2 = expand_normal (arg2);
38859 if (!REG_P (op0))
38860 op0 = ix86_zero_extend_to_Pmode (op0);
38861 if (!REG_P (op1))
38862 op1 = copy_to_mode_reg (SImode, op1);
38863 if (!REG_P (op2))
38864 op2 = copy_to_mode_reg (SImode, op2);
38865 emit_insn (ix86_gen_monitor (op0, op1, op2));
38866 return 0;
38868 case IX86_BUILTIN_MWAIT:
38869 arg0 = CALL_EXPR_ARG (exp, 0);
38870 arg1 = CALL_EXPR_ARG (exp, 1);
38871 op0 = expand_normal (arg0);
38872 op1 = expand_normal (arg1);
38873 if (!REG_P (op0))
38874 op0 = copy_to_mode_reg (SImode, op0);
38875 if (!REG_P (op1))
38876 op1 = copy_to_mode_reg (SImode, op1);
38877 emit_insn (gen_sse3_mwait (op0, op1));
38878 return 0;
38880 case IX86_BUILTIN_VEC_INIT_V2SI:
38881 case IX86_BUILTIN_VEC_INIT_V4HI:
38882 case IX86_BUILTIN_VEC_INIT_V8QI:
38883 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38885 case IX86_BUILTIN_VEC_EXT_V2DF:
38886 case IX86_BUILTIN_VEC_EXT_V2DI:
38887 case IX86_BUILTIN_VEC_EXT_V4SF:
38888 case IX86_BUILTIN_VEC_EXT_V4SI:
38889 case IX86_BUILTIN_VEC_EXT_V8HI:
38890 case IX86_BUILTIN_VEC_EXT_V2SI:
38891 case IX86_BUILTIN_VEC_EXT_V4HI:
38892 case IX86_BUILTIN_VEC_EXT_V16QI:
38893 return ix86_expand_vec_ext_builtin (exp, target);
38895 case IX86_BUILTIN_VEC_SET_V2DI:
38896 case IX86_BUILTIN_VEC_SET_V4SF:
38897 case IX86_BUILTIN_VEC_SET_V4SI:
38898 case IX86_BUILTIN_VEC_SET_V8HI:
38899 case IX86_BUILTIN_VEC_SET_V4HI:
38900 case IX86_BUILTIN_VEC_SET_V16QI:
38901 return ix86_expand_vec_set_builtin (exp);
38903 case IX86_BUILTIN_INFQ:
38904 case IX86_BUILTIN_HUGE_VALQ:
38906 REAL_VALUE_TYPE inf;
38907 rtx tmp;
38909 real_inf (&inf);
38910 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38912 tmp = validize_mem (force_const_mem (mode, tmp));
38914 if (target == 0)
38915 target = gen_reg_rtx (mode);
38917 emit_move_insn (target, tmp);
38918 return target;
38921 case IX86_BUILTIN_RDPMC:
38922 case IX86_BUILTIN_RDTSC:
38923 case IX86_BUILTIN_RDTSCP:
38925 op0 = gen_reg_rtx (DImode);
38926 op1 = gen_reg_rtx (DImode);
38928 if (fcode == IX86_BUILTIN_RDPMC)
38930 arg0 = CALL_EXPR_ARG (exp, 0);
38931 op2 = expand_normal (arg0);
38932 if (!register_operand (op2, SImode))
38933 op2 = copy_to_mode_reg (SImode, op2);
38935 insn = (TARGET_64BIT
38936 ? gen_rdpmc_rex64 (op0, op1, op2)
38937 : gen_rdpmc (op0, op2));
38938 emit_insn (insn);
38940 else if (fcode == IX86_BUILTIN_RDTSC)
38942 insn = (TARGET_64BIT
38943 ? gen_rdtsc_rex64 (op0, op1)
38944 : gen_rdtsc (op0));
38945 emit_insn (insn);
38947 else
38949 op2 = gen_reg_rtx (SImode);
38951 insn = (TARGET_64BIT
38952 ? gen_rdtscp_rex64 (op0, op1, op2)
38953 : gen_rdtscp (op0, op2));
38954 emit_insn (insn);
38956 arg0 = CALL_EXPR_ARG (exp, 0);
38957 op4 = expand_normal (arg0);
38958 if (!address_operand (op4, VOIDmode))
38960 op4 = convert_memory_address (Pmode, op4);
38961 op4 = copy_addr_to_reg (op4);
38963 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
38966 if (target == 0)
38968 /* mode is VOIDmode if __builtin_rd* has been called
38969 without lhs. */
38970 if (mode == VOIDmode)
38971 return target;
38972 target = gen_reg_rtx (mode);
38975 if (TARGET_64BIT)
38977 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
38978 op1, 1, OPTAB_DIRECT);
38979 op0 = expand_simple_binop (DImode, IOR, op0, op1,
38980 op0, 1, OPTAB_DIRECT);
38983 emit_move_insn (target, op0);
38984 return target;
38986 case IX86_BUILTIN_FXSAVE:
38987 case IX86_BUILTIN_FXRSTOR:
38988 case IX86_BUILTIN_FXSAVE64:
38989 case IX86_BUILTIN_FXRSTOR64:
38990 case IX86_BUILTIN_FNSTENV:
38991 case IX86_BUILTIN_FLDENV:
38992 mode0 = BLKmode;
38993 switch (fcode)
38995 case IX86_BUILTIN_FXSAVE:
38996 icode = CODE_FOR_fxsave;
38997 break;
38998 case IX86_BUILTIN_FXRSTOR:
38999 icode = CODE_FOR_fxrstor;
39000 break;
39001 case IX86_BUILTIN_FXSAVE64:
39002 icode = CODE_FOR_fxsave64;
39003 break;
39004 case IX86_BUILTIN_FXRSTOR64:
39005 icode = CODE_FOR_fxrstor64;
39006 break;
39007 case IX86_BUILTIN_FNSTENV:
39008 icode = CODE_FOR_fnstenv;
39009 break;
39010 case IX86_BUILTIN_FLDENV:
39011 icode = CODE_FOR_fldenv;
39012 break;
39013 default:
39014 gcc_unreachable ();
39017 arg0 = CALL_EXPR_ARG (exp, 0);
39018 op0 = expand_normal (arg0);
39020 if (!address_operand (op0, VOIDmode))
39022 op0 = convert_memory_address (Pmode, op0);
39023 op0 = copy_addr_to_reg (op0);
39025 op0 = gen_rtx_MEM (mode0, op0);
39027 pat = GEN_FCN (icode) (op0);
39028 if (pat)
39029 emit_insn (pat);
39030 return 0;
39032 case IX86_BUILTIN_XSAVE:
39033 case IX86_BUILTIN_XRSTOR:
39034 case IX86_BUILTIN_XSAVE64:
39035 case IX86_BUILTIN_XRSTOR64:
39036 case IX86_BUILTIN_XSAVEOPT:
39037 case IX86_BUILTIN_XSAVEOPT64:
39038 case IX86_BUILTIN_XSAVES:
39039 case IX86_BUILTIN_XRSTORS:
39040 case IX86_BUILTIN_XSAVES64:
39041 case IX86_BUILTIN_XRSTORS64:
39042 case IX86_BUILTIN_XSAVEC:
39043 case IX86_BUILTIN_XSAVEC64:
39044 arg0 = CALL_EXPR_ARG (exp, 0);
39045 arg1 = CALL_EXPR_ARG (exp, 1);
39046 op0 = expand_normal (arg0);
39047 op1 = expand_normal (arg1);
39049 if (!address_operand (op0, VOIDmode))
39051 op0 = convert_memory_address (Pmode, op0);
39052 op0 = copy_addr_to_reg (op0);
39054 op0 = gen_rtx_MEM (BLKmode, op0);
39056 op1 = force_reg (DImode, op1);
39058 if (TARGET_64BIT)
39060 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39061 NULL, 1, OPTAB_DIRECT);
39062 switch (fcode)
39064 case IX86_BUILTIN_XSAVE:
39065 icode = CODE_FOR_xsave_rex64;
39066 break;
39067 case IX86_BUILTIN_XRSTOR:
39068 icode = CODE_FOR_xrstor_rex64;
39069 break;
39070 case IX86_BUILTIN_XSAVE64:
39071 icode = CODE_FOR_xsave64;
39072 break;
39073 case IX86_BUILTIN_XRSTOR64:
39074 icode = CODE_FOR_xrstor64;
39075 break;
39076 case IX86_BUILTIN_XSAVEOPT:
39077 icode = CODE_FOR_xsaveopt_rex64;
39078 break;
39079 case IX86_BUILTIN_XSAVEOPT64:
39080 icode = CODE_FOR_xsaveopt64;
39081 break;
39082 case IX86_BUILTIN_XSAVES:
39083 icode = CODE_FOR_xsaves_rex64;
39084 break;
39085 case IX86_BUILTIN_XRSTORS:
39086 icode = CODE_FOR_xrstors_rex64;
39087 break;
39088 case IX86_BUILTIN_XSAVES64:
39089 icode = CODE_FOR_xsaves64;
39090 break;
39091 case IX86_BUILTIN_XRSTORS64:
39092 icode = CODE_FOR_xrstors64;
39093 break;
39094 case IX86_BUILTIN_XSAVEC:
39095 icode = CODE_FOR_xsavec_rex64;
39096 break;
39097 case IX86_BUILTIN_XSAVEC64:
39098 icode = CODE_FOR_xsavec64;
39099 break;
39100 default:
39101 gcc_unreachable ();
39104 op2 = gen_lowpart (SImode, op2);
39105 op1 = gen_lowpart (SImode, op1);
39106 pat = GEN_FCN (icode) (op0, op1, op2);
39108 else
39110 switch (fcode)
39112 case IX86_BUILTIN_XSAVE:
39113 icode = CODE_FOR_xsave;
39114 break;
39115 case IX86_BUILTIN_XRSTOR:
39116 icode = CODE_FOR_xrstor;
39117 break;
39118 case IX86_BUILTIN_XSAVEOPT:
39119 icode = CODE_FOR_xsaveopt;
39120 break;
39121 case IX86_BUILTIN_XSAVES:
39122 icode = CODE_FOR_xsaves;
39123 break;
39124 case IX86_BUILTIN_XRSTORS:
39125 icode = CODE_FOR_xrstors;
39126 break;
39127 case IX86_BUILTIN_XSAVEC:
39128 icode = CODE_FOR_xsavec;
39129 break;
39130 default:
39131 gcc_unreachable ();
39133 pat = GEN_FCN (icode) (op0, op1);
39136 if (pat)
39137 emit_insn (pat);
39138 return 0;
39140 case IX86_BUILTIN_LLWPCB:
39141 arg0 = CALL_EXPR_ARG (exp, 0);
39142 op0 = expand_normal (arg0);
39143 icode = CODE_FOR_lwp_llwpcb;
39144 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39145 op0 = ix86_zero_extend_to_Pmode (op0);
39146 emit_insn (gen_lwp_llwpcb (op0));
39147 return 0;
39149 case IX86_BUILTIN_SLWPCB:
39150 icode = CODE_FOR_lwp_slwpcb;
39151 if (!target
39152 || !insn_data[icode].operand[0].predicate (target, Pmode))
39153 target = gen_reg_rtx (Pmode);
39154 emit_insn (gen_lwp_slwpcb (target));
39155 return target;
39157 case IX86_BUILTIN_BEXTRI32:
39158 case IX86_BUILTIN_BEXTRI64:
39159 arg0 = CALL_EXPR_ARG (exp, 0);
39160 arg1 = CALL_EXPR_ARG (exp, 1);
39161 op0 = expand_normal (arg0);
39162 op1 = expand_normal (arg1);
39163 icode = (fcode == IX86_BUILTIN_BEXTRI32
39164 ? CODE_FOR_tbm_bextri_si
39165 : CODE_FOR_tbm_bextri_di);
39166 if (!CONST_INT_P (op1))
39168 error ("last argument must be an immediate");
39169 return const0_rtx;
39171 else
39173 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39174 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39175 op1 = GEN_INT (length);
39176 op2 = GEN_INT (lsb_index);
39177 pat = GEN_FCN (icode) (target, op0, op1, op2);
39178 if (pat)
39179 emit_insn (pat);
39180 return target;
39183 case IX86_BUILTIN_RDRAND16_STEP:
39184 icode = CODE_FOR_rdrandhi_1;
39185 mode0 = HImode;
39186 goto rdrand_step;
39188 case IX86_BUILTIN_RDRAND32_STEP:
39189 icode = CODE_FOR_rdrandsi_1;
39190 mode0 = SImode;
39191 goto rdrand_step;
39193 case IX86_BUILTIN_RDRAND64_STEP:
39194 icode = CODE_FOR_rdranddi_1;
39195 mode0 = DImode;
39197 rdrand_step:
39198 op0 = gen_reg_rtx (mode0);
39199 emit_insn (GEN_FCN (icode) (op0));
39201 arg0 = CALL_EXPR_ARG (exp, 0);
39202 op1 = expand_normal (arg0);
39203 if (!address_operand (op1, VOIDmode))
39205 op1 = convert_memory_address (Pmode, op1);
39206 op1 = copy_addr_to_reg (op1);
39208 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39210 op1 = gen_reg_rtx (SImode);
39211 emit_move_insn (op1, CONST1_RTX (SImode));
39213 /* Emit SImode conditional move. */
39214 if (mode0 == HImode)
39216 op2 = gen_reg_rtx (SImode);
39217 emit_insn (gen_zero_extendhisi2 (op2, op0));
39219 else if (mode0 == SImode)
39220 op2 = op0;
39221 else
39222 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39224 if (target == 0
39225 || !register_operand (target, SImode))
39226 target = gen_reg_rtx (SImode);
39228 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39229 const0_rtx);
39230 emit_insn (gen_rtx_SET (target,
39231 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39232 return target;
39234 case IX86_BUILTIN_RDSEED16_STEP:
39235 icode = CODE_FOR_rdseedhi_1;
39236 mode0 = HImode;
39237 goto rdseed_step;
39239 case IX86_BUILTIN_RDSEED32_STEP:
39240 icode = CODE_FOR_rdseedsi_1;
39241 mode0 = SImode;
39242 goto rdseed_step;
39244 case IX86_BUILTIN_RDSEED64_STEP:
39245 icode = CODE_FOR_rdseeddi_1;
39246 mode0 = DImode;
39248 rdseed_step:
39249 op0 = gen_reg_rtx (mode0);
39250 emit_insn (GEN_FCN (icode) (op0));
39252 arg0 = CALL_EXPR_ARG (exp, 0);
39253 op1 = expand_normal (arg0);
39254 if (!address_operand (op1, VOIDmode))
39256 op1 = convert_memory_address (Pmode, op1);
39257 op1 = copy_addr_to_reg (op1);
39259 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39261 op2 = gen_reg_rtx (QImode);
39263 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39264 const0_rtx);
39265 emit_insn (gen_rtx_SET (op2, pat));
39267 if (target == 0
39268 || !register_operand (target, SImode))
39269 target = gen_reg_rtx (SImode);
39271 emit_insn (gen_zero_extendqisi2 (target, op2));
39272 return target;
39274 case IX86_BUILTIN_SBB32:
39275 icode = CODE_FOR_subsi3_carry;
39276 mode0 = SImode;
39277 goto addcarryx;
39279 case IX86_BUILTIN_SBB64:
39280 icode = CODE_FOR_subdi3_carry;
39281 mode0 = DImode;
39282 goto addcarryx;
39284 case IX86_BUILTIN_ADDCARRYX32:
39285 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39286 mode0 = SImode;
39287 goto addcarryx;
39289 case IX86_BUILTIN_ADDCARRYX64:
39290 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39291 mode0 = DImode;
39293 addcarryx:
39294 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39295 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39296 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39297 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39299 op0 = gen_reg_rtx (QImode);
39301 /* Generate CF from input operand. */
39302 op1 = expand_normal (arg0);
39303 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39304 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39306 /* Gen ADCX instruction to compute X+Y+CF. */
39307 op2 = expand_normal (arg1);
39308 op3 = expand_normal (arg2);
39310 if (!REG_P (op2))
39311 op2 = copy_to_mode_reg (mode0, op2);
39312 if (!REG_P (op3))
39313 op3 = copy_to_mode_reg (mode0, op3);
39315 op0 = gen_reg_rtx (mode0);
39317 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39318 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39319 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39321 /* Store the result. */
39322 op4 = expand_normal (arg3);
39323 if (!address_operand (op4, VOIDmode))
39325 op4 = convert_memory_address (Pmode, op4);
39326 op4 = copy_addr_to_reg (op4);
39328 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39330 /* Return current CF value. */
39331 if (target == 0)
39332 target = gen_reg_rtx (QImode);
39334 PUT_MODE (pat, QImode);
39335 emit_insn (gen_rtx_SET (target, pat));
39336 return target;
39338 case IX86_BUILTIN_READ_FLAGS:
39339 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39341 if (optimize
39342 || target == NULL_RTX
39343 || !nonimmediate_operand (target, word_mode)
39344 || GET_MODE (target) != word_mode)
39345 target = gen_reg_rtx (word_mode);
39347 emit_insn (gen_pop (target));
39348 return target;
39350 case IX86_BUILTIN_WRITE_FLAGS:
39352 arg0 = CALL_EXPR_ARG (exp, 0);
39353 op0 = expand_normal (arg0);
39354 if (!general_no_elim_operand (op0, word_mode))
39355 op0 = copy_to_mode_reg (word_mode, op0);
39357 emit_insn (gen_push (op0));
39358 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39359 return 0;
39361 case IX86_BUILTIN_KORTESTC16:
39362 icode = CODE_FOR_kortestchi;
39363 mode0 = HImode;
39364 mode1 = CCCmode;
39365 goto kortest;
39367 case IX86_BUILTIN_KORTESTZ16:
39368 icode = CODE_FOR_kortestzhi;
39369 mode0 = HImode;
39370 mode1 = CCZmode;
39372 kortest:
39373 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39374 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39375 op0 = expand_normal (arg0);
39376 op1 = expand_normal (arg1);
39378 op0 = copy_to_reg (op0);
39379 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39380 op1 = copy_to_reg (op1);
39381 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39383 target = gen_reg_rtx (QImode);
39384 emit_insn (gen_rtx_SET (target, const0_rtx));
39386 /* Emit kortest. */
39387 emit_insn (GEN_FCN (icode) (op0, op1));
39388 /* And use setcc to return result from flags. */
39389 ix86_expand_setcc (target, EQ,
39390 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39391 return target;
39393 case IX86_BUILTIN_GATHERSIV2DF:
39394 icode = CODE_FOR_avx2_gathersiv2df;
39395 goto gather_gen;
39396 case IX86_BUILTIN_GATHERSIV4DF:
39397 icode = CODE_FOR_avx2_gathersiv4df;
39398 goto gather_gen;
39399 case IX86_BUILTIN_GATHERDIV2DF:
39400 icode = CODE_FOR_avx2_gatherdiv2df;
39401 goto gather_gen;
39402 case IX86_BUILTIN_GATHERDIV4DF:
39403 icode = CODE_FOR_avx2_gatherdiv4df;
39404 goto gather_gen;
39405 case IX86_BUILTIN_GATHERSIV4SF:
39406 icode = CODE_FOR_avx2_gathersiv4sf;
39407 goto gather_gen;
39408 case IX86_BUILTIN_GATHERSIV8SF:
39409 icode = CODE_FOR_avx2_gathersiv8sf;
39410 goto gather_gen;
39411 case IX86_BUILTIN_GATHERDIV4SF:
39412 icode = CODE_FOR_avx2_gatherdiv4sf;
39413 goto gather_gen;
39414 case IX86_BUILTIN_GATHERDIV8SF:
39415 icode = CODE_FOR_avx2_gatherdiv8sf;
39416 goto gather_gen;
39417 case IX86_BUILTIN_GATHERSIV2DI:
39418 icode = CODE_FOR_avx2_gathersiv2di;
39419 goto gather_gen;
39420 case IX86_BUILTIN_GATHERSIV4DI:
39421 icode = CODE_FOR_avx2_gathersiv4di;
39422 goto gather_gen;
39423 case IX86_BUILTIN_GATHERDIV2DI:
39424 icode = CODE_FOR_avx2_gatherdiv2di;
39425 goto gather_gen;
39426 case IX86_BUILTIN_GATHERDIV4DI:
39427 icode = CODE_FOR_avx2_gatherdiv4di;
39428 goto gather_gen;
39429 case IX86_BUILTIN_GATHERSIV4SI:
39430 icode = CODE_FOR_avx2_gathersiv4si;
39431 goto gather_gen;
39432 case IX86_BUILTIN_GATHERSIV8SI:
39433 icode = CODE_FOR_avx2_gathersiv8si;
39434 goto gather_gen;
39435 case IX86_BUILTIN_GATHERDIV4SI:
39436 icode = CODE_FOR_avx2_gatherdiv4si;
39437 goto gather_gen;
39438 case IX86_BUILTIN_GATHERDIV8SI:
39439 icode = CODE_FOR_avx2_gatherdiv8si;
39440 goto gather_gen;
39441 case IX86_BUILTIN_GATHERALTSIV4DF:
39442 icode = CODE_FOR_avx2_gathersiv4df;
39443 goto gather_gen;
39444 case IX86_BUILTIN_GATHERALTDIV8SF:
39445 icode = CODE_FOR_avx2_gatherdiv8sf;
39446 goto gather_gen;
39447 case IX86_BUILTIN_GATHERALTSIV4DI:
39448 icode = CODE_FOR_avx2_gathersiv4di;
39449 goto gather_gen;
39450 case IX86_BUILTIN_GATHERALTDIV8SI:
39451 icode = CODE_FOR_avx2_gatherdiv8si;
39452 goto gather_gen;
39453 case IX86_BUILTIN_GATHER3SIV16SF:
39454 icode = CODE_FOR_avx512f_gathersiv16sf;
39455 goto gather_gen;
39456 case IX86_BUILTIN_GATHER3SIV8DF:
39457 icode = CODE_FOR_avx512f_gathersiv8df;
39458 goto gather_gen;
39459 case IX86_BUILTIN_GATHER3DIV16SF:
39460 icode = CODE_FOR_avx512f_gatherdiv16sf;
39461 goto gather_gen;
39462 case IX86_BUILTIN_GATHER3DIV8DF:
39463 icode = CODE_FOR_avx512f_gatherdiv8df;
39464 goto gather_gen;
39465 case IX86_BUILTIN_GATHER3SIV16SI:
39466 icode = CODE_FOR_avx512f_gathersiv16si;
39467 goto gather_gen;
39468 case IX86_BUILTIN_GATHER3SIV8DI:
39469 icode = CODE_FOR_avx512f_gathersiv8di;
39470 goto gather_gen;
39471 case IX86_BUILTIN_GATHER3DIV16SI:
39472 icode = CODE_FOR_avx512f_gatherdiv16si;
39473 goto gather_gen;
39474 case IX86_BUILTIN_GATHER3DIV8DI:
39475 icode = CODE_FOR_avx512f_gatherdiv8di;
39476 goto gather_gen;
39477 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39478 icode = CODE_FOR_avx512f_gathersiv8df;
39479 goto gather_gen;
39480 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39481 icode = CODE_FOR_avx512f_gatherdiv16sf;
39482 goto gather_gen;
39483 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39484 icode = CODE_FOR_avx512f_gathersiv8di;
39485 goto gather_gen;
39486 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39487 icode = CODE_FOR_avx512f_gatherdiv16si;
39488 goto gather_gen;
39489 case IX86_BUILTIN_GATHER3SIV2DF:
39490 icode = CODE_FOR_avx512vl_gathersiv2df;
39491 goto gather_gen;
39492 case IX86_BUILTIN_GATHER3SIV4DF:
39493 icode = CODE_FOR_avx512vl_gathersiv4df;
39494 goto gather_gen;
39495 case IX86_BUILTIN_GATHER3DIV2DF:
39496 icode = CODE_FOR_avx512vl_gatherdiv2df;
39497 goto gather_gen;
39498 case IX86_BUILTIN_GATHER3DIV4DF:
39499 icode = CODE_FOR_avx512vl_gatherdiv4df;
39500 goto gather_gen;
39501 case IX86_BUILTIN_GATHER3SIV4SF:
39502 icode = CODE_FOR_avx512vl_gathersiv4sf;
39503 goto gather_gen;
39504 case IX86_BUILTIN_GATHER3SIV8SF:
39505 icode = CODE_FOR_avx512vl_gathersiv8sf;
39506 goto gather_gen;
39507 case IX86_BUILTIN_GATHER3DIV4SF:
39508 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39509 goto gather_gen;
39510 case IX86_BUILTIN_GATHER3DIV8SF:
39511 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39512 goto gather_gen;
39513 case IX86_BUILTIN_GATHER3SIV2DI:
39514 icode = CODE_FOR_avx512vl_gathersiv2di;
39515 goto gather_gen;
39516 case IX86_BUILTIN_GATHER3SIV4DI:
39517 icode = CODE_FOR_avx512vl_gathersiv4di;
39518 goto gather_gen;
39519 case IX86_BUILTIN_GATHER3DIV2DI:
39520 icode = CODE_FOR_avx512vl_gatherdiv2di;
39521 goto gather_gen;
39522 case IX86_BUILTIN_GATHER3DIV4DI:
39523 icode = CODE_FOR_avx512vl_gatherdiv4di;
39524 goto gather_gen;
39525 case IX86_BUILTIN_GATHER3SIV4SI:
39526 icode = CODE_FOR_avx512vl_gathersiv4si;
39527 goto gather_gen;
39528 case IX86_BUILTIN_GATHER3SIV8SI:
39529 icode = CODE_FOR_avx512vl_gathersiv8si;
39530 goto gather_gen;
39531 case IX86_BUILTIN_GATHER3DIV4SI:
39532 icode = CODE_FOR_avx512vl_gatherdiv4si;
39533 goto gather_gen;
39534 case IX86_BUILTIN_GATHER3DIV8SI:
39535 icode = CODE_FOR_avx512vl_gatherdiv8si;
39536 goto gather_gen;
39537 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39538 icode = CODE_FOR_avx512vl_gathersiv4df;
39539 goto gather_gen;
39540 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39541 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39542 goto gather_gen;
39543 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39544 icode = CODE_FOR_avx512vl_gathersiv4di;
39545 goto gather_gen;
39546 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39547 icode = CODE_FOR_avx512vl_gatherdiv8si;
39548 goto gather_gen;
39549 case IX86_BUILTIN_SCATTERSIV16SF:
39550 icode = CODE_FOR_avx512f_scattersiv16sf;
39551 goto scatter_gen;
39552 case IX86_BUILTIN_SCATTERSIV8DF:
39553 icode = CODE_FOR_avx512f_scattersiv8df;
39554 goto scatter_gen;
39555 case IX86_BUILTIN_SCATTERDIV16SF:
39556 icode = CODE_FOR_avx512f_scatterdiv16sf;
39557 goto scatter_gen;
39558 case IX86_BUILTIN_SCATTERDIV8DF:
39559 icode = CODE_FOR_avx512f_scatterdiv8df;
39560 goto scatter_gen;
39561 case IX86_BUILTIN_SCATTERSIV16SI:
39562 icode = CODE_FOR_avx512f_scattersiv16si;
39563 goto scatter_gen;
39564 case IX86_BUILTIN_SCATTERSIV8DI:
39565 icode = CODE_FOR_avx512f_scattersiv8di;
39566 goto scatter_gen;
39567 case IX86_BUILTIN_SCATTERDIV16SI:
39568 icode = CODE_FOR_avx512f_scatterdiv16si;
39569 goto scatter_gen;
39570 case IX86_BUILTIN_SCATTERDIV8DI:
39571 icode = CODE_FOR_avx512f_scatterdiv8di;
39572 goto scatter_gen;
39573 case IX86_BUILTIN_SCATTERSIV8SF:
39574 icode = CODE_FOR_avx512vl_scattersiv8sf;
39575 goto scatter_gen;
39576 case IX86_BUILTIN_SCATTERSIV4SF:
39577 icode = CODE_FOR_avx512vl_scattersiv4sf;
39578 goto scatter_gen;
39579 case IX86_BUILTIN_SCATTERSIV4DF:
39580 icode = CODE_FOR_avx512vl_scattersiv4df;
39581 goto scatter_gen;
39582 case IX86_BUILTIN_SCATTERSIV2DF:
39583 icode = CODE_FOR_avx512vl_scattersiv2df;
39584 goto scatter_gen;
39585 case IX86_BUILTIN_SCATTERDIV8SF:
39586 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39587 goto scatter_gen;
39588 case IX86_BUILTIN_SCATTERDIV4SF:
39589 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39590 goto scatter_gen;
39591 case IX86_BUILTIN_SCATTERDIV4DF:
39592 icode = CODE_FOR_avx512vl_scatterdiv4df;
39593 goto scatter_gen;
39594 case IX86_BUILTIN_SCATTERDIV2DF:
39595 icode = CODE_FOR_avx512vl_scatterdiv2df;
39596 goto scatter_gen;
39597 case IX86_BUILTIN_SCATTERSIV8SI:
39598 icode = CODE_FOR_avx512vl_scattersiv8si;
39599 goto scatter_gen;
39600 case IX86_BUILTIN_SCATTERSIV4SI:
39601 icode = CODE_FOR_avx512vl_scattersiv4si;
39602 goto scatter_gen;
39603 case IX86_BUILTIN_SCATTERSIV4DI:
39604 icode = CODE_FOR_avx512vl_scattersiv4di;
39605 goto scatter_gen;
39606 case IX86_BUILTIN_SCATTERSIV2DI:
39607 icode = CODE_FOR_avx512vl_scattersiv2di;
39608 goto scatter_gen;
39609 case IX86_BUILTIN_SCATTERDIV8SI:
39610 icode = CODE_FOR_avx512vl_scatterdiv8si;
39611 goto scatter_gen;
39612 case IX86_BUILTIN_SCATTERDIV4SI:
39613 icode = CODE_FOR_avx512vl_scatterdiv4si;
39614 goto scatter_gen;
39615 case IX86_BUILTIN_SCATTERDIV4DI:
39616 icode = CODE_FOR_avx512vl_scatterdiv4di;
39617 goto scatter_gen;
39618 case IX86_BUILTIN_SCATTERDIV2DI:
39619 icode = CODE_FOR_avx512vl_scatterdiv2di;
39620 goto scatter_gen;
39621 case IX86_BUILTIN_GATHERPFDPD:
39622 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39623 goto vec_prefetch_gen;
39624 case IX86_BUILTIN_GATHERPFDPS:
39625 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39626 goto vec_prefetch_gen;
39627 case IX86_BUILTIN_GATHERPFQPD:
39628 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39629 goto vec_prefetch_gen;
39630 case IX86_BUILTIN_GATHERPFQPS:
39631 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39632 goto vec_prefetch_gen;
39633 case IX86_BUILTIN_SCATTERPFDPD:
39634 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39635 goto vec_prefetch_gen;
39636 case IX86_BUILTIN_SCATTERPFDPS:
39637 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39638 goto vec_prefetch_gen;
39639 case IX86_BUILTIN_SCATTERPFQPD:
39640 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39641 goto vec_prefetch_gen;
39642 case IX86_BUILTIN_SCATTERPFQPS:
39643 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39644 goto vec_prefetch_gen;
39646 gather_gen:
39647 rtx half;
39648 rtx (*gen) (rtx, rtx);
39650 arg0 = CALL_EXPR_ARG (exp, 0);
39651 arg1 = CALL_EXPR_ARG (exp, 1);
39652 arg2 = CALL_EXPR_ARG (exp, 2);
39653 arg3 = CALL_EXPR_ARG (exp, 3);
39654 arg4 = CALL_EXPR_ARG (exp, 4);
39655 op0 = expand_normal (arg0);
39656 op1 = expand_normal (arg1);
39657 op2 = expand_normal (arg2);
39658 op3 = expand_normal (arg3);
39659 op4 = expand_normal (arg4);
39660 /* Note the arg order is different from the operand order. */
39661 mode0 = insn_data[icode].operand[1].mode;
39662 mode2 = insn_data[icode].operand[3].mode;
39663 mode3 = insn_data[icode].operand[4].mode;
39664 mode4 = insn_data[icode].operand[5].mode;
39666 if (target == NULL_RTX
39667 || GET_MODE (target) != insn_data[icode].operand[0].mode
39668 || !insn_data[icode].operand[0].predicate (target,
39669 GET_MODE (target)))
39670 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39671 else
39672 subtarget = target;
39674 switch (fcode)
39676 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39677 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39678 half = gen_reg_rtx (V8SImode);
39679 if (!nonimmediate_operand (op2, V16SImode))
39680 op2 = copy_to_mode_reg (V16SImode, op2);
39681 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39682 op2 = half;
39683 break;
39684 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39685 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39686 case IX86_BUILTIN_GATHERALTSIV4DF:
39687 case IX86_BUILTIN_GATHERALTSIV4DI:
39688 half = gen_reg_rtx (V4SImode);
39689 if (!nonimmediate_operand (op2, V8SImode))
39690 op2 = copy_to_mode_reg (V8SImode, op2);
39691 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39692 op2 = half;
39693 break;
39694 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39695 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39696 half = gen_reg_rtx (mode0);
39697 if (mode0 == V8SFmode)
39698 gen = gen_vec_extract_lo_v16sf;
39699 else
39700 gen = gen_vec_extract_lo_v16si;
39701 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39702 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39703 emit_insn (gen (half, op0));
39704 op0 = half;
39705 if (GET_MODE (op3) != VOIDmode)
39707 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39708 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39709 emit_insn (gen (half, op3));
39710 op3 = half;
39712 break;
39713 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39714 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39715 case IX86_BUILTIN_GATHERALTDIV8SF:
39716 case IX86_BUILTIN_GATHERALTDIV8SI:
39717 half = gen_reg_rtx (mode0);
39718 if (mode0 == V4SFmode)
39719 gen = gen_vec_extract_lo_v8sf;
39720 else
39721 gen = gen_vec_extract_lo_v8si;
39722 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39723 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39724 emit_insn (gen (half, op0));
39725 op0 = half;
39726 if (GET_MODE (op3) != VOIDmode)
39728 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39729 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39730 emit_insn (gen (half, op3));
39731 op3 = half;
39733 break;
39734 default:
39735 break;
39738 /* Force memory operand only with base register here. But we
39739 don't want to do it on memory operand for other builtin
39740 functions. */
39741 op1 = ix86_zero_extend_to_Pmode (op1);
39743 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39744 op0 = copy_to_mode_reg (mode0, op0);
39745 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39746 op1 = copy_to_mode_reg (Pmode, op1);
39747 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39748 op2 = copy_to_mode_reg (mode2, op2);
39750 op3 = fixup_modeless_constant (op3, mode3);
39752 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39754 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39755 op3 = copy_to_mode_reg (mode3, op3);
39757 else
39759 op3 = copy_to_reg (op3);
39760 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39762 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39764 error ("the last argument must be scale 1, 2, 4, 8");
39765 return const0_rtx;
39768 /* Optimize. If mask is known to have all high bits set,
39769 replace op0 with pc_rtx to signal that the instruction
39770 overwrites the whole destination and doesn't use its
39771 previous contents. */
39772 if (optimize)
39774 if (TREE_CODE (arg3) == INTEGER_CST)
39776 if (integer_all_onesp (arg3))
39777 op0 = pc_rtx;
39779 else if (TREE_CODE (arg3) == VECTOR_CST)
39781 unsigned int negative = 0;
39782 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39784 tree cst = VECTOR_CST_ELT (arg3, i);
39785 if (TREE_CODE (cst) == INTEGER_CST
39786 && tree_int_cst_sign_bit (cst))
39787 negative++;
39788 else if (TREE_CODE (cst) == REAL_CST
39789 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39790 negative++;
39792 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39793 op0 = pc_rtx;
39795 else if (TREE_CODE (arg3) == SSA_NAME
39796 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39798 /* Recognize also when mask is like:
39799 __v2df src = _mm_setzero_pd ();
39800 __v2df mask = _mm_cmpeq_pd (src, src);
39802 __v8sf src = _mm256_setzero_ps ();
39803 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39804 as that is a cheaper way to load all ones into
39805 a register than having to load a constant from
39806 memory. */
39807 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39808 if (is_gimple_call (def_stmt))
39810 tree fndecl = gimple_call_fndecl (def_stmt);
39811 if (fndecl
39812 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39813 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39815 case IX86_BUILTIN_CMPPD:
39816 case IX86_BUILTIN_CMPPS:
39817 case IX86_BUILTIN_CMPPD256:
39818 case IX86_BUILTIN_CMPPS256:
39819 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39820 break;
39821 /* FALLTHRU */
39822 case IX86_BUILTIN_CMPEQPD:
39823 case IX86_BUILTIN_CMPEQPS:
39824 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39825 && initializer_zerop (gimple_call_arg (def_stmt,
39826 1)))
39827 op0 = pc_rtx;
39828 break;
39829 default:
39830 break;
39836 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39837 if (! pat)
39838 return const0_rtx;
39839 emit_insn (pat);
39841 switch (fcode)
39843 case IX86_BUILTIN_GATHER3DIV16SF:
39844 if (target == NULL_RTX)
39845 target = gen_reg_rtx (V8SFmode);
39846 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39847 break;
39848 case IX86_BUILTIN_GATHER3DIV16SI:
39849 if (target == NULL_RTX)
39850 target = gen_reg_rtx (V8SImode);
39851 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39852 break;
39853 case IX86_BUILTIN_GATHER3DIV8SF:
39854 case IX86_BUILTIN_GATHERDIV8SF:
39855 if (target == NULL_RTX)
39856 target = gen_reg_rtx (V4SFmode);
39857 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39858 break;
39859 case IX86_BUILTIN_GATHER3DIV8SI:
39860 case IX86_BUILTIN_GATHERDIV8SI:
39861 if (target == NULL_RTX)
39862 target = gen_reg_rtx (V4SImode);
39863 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39864 break;
39865 default:
39866 target = subtarget;
39867 break;
39869 return target;
39871 scatter_gen:
39872 arg0 = CALL_EXPR_ARG (exp, 0);
39873 arg1 = CALL_EXPR_ARG (exp, 1);
39874 arg2 = CALL_EXPR_ARG (exp, 2);
39875 arg3 = CALL_EXPR_ARG (exp, 3);
39876 arg4 = CALL_EXPR_ARG (exp, 4);
39877 op0 = expand_normal (arg0);
39878 op1 = expand_normal (arg1);
39879 op2 = expand_normal (arg2);
39880 op3 = expand_normal (arg3);
39881 op4 = expand_normal (arg4);
39882 mode1 = insn_data[icode].operand[1].mode;
39883 mode2 = insn_data[icode].operand[2].mode;
39884 mode3 = insn_data[icode].operand[3].mode;
39885 mode4 = insn_data[icode].operand[4].mode;
39887 /* Force memory operand only with base register here. But we
39888 don't want to do it on memory operand for other builtin
39889 functions. */
39890 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39892 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39893 op0 = copy_to_mode_reg (Pmode, op0);
39895 op1 = fixup_modeless_constant (op1, mode1);
39897 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39899 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39900 op1 = copy_to_mode_reg (mode1, op1);
39902 else
39904 op1 = copy_to_reg (op1);
39905 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39908 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39909 op2 = copy_to_mode_reg (mode2, op2);
39911 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39912 op3 = copy_to_mode_reg (mode3, op3);
39914 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39916 error ("the last argument must be scale 1, 2, 4, 8");
39917 return const0_rtx;
39920 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39921 if (! pat)
39922 return const0_rtx;
39924 emit_insn (pat);
39925 return 0;
39927 vec_prefetch_gen:
39928 arg0 = CALL_EXPR_ARG (exp, 0);
39929 arg1 = CALL_EXPR_ARG (exp, 1);
39930 arg2 = CALL_EXPR_ARG (exp, 2);
39931 arg3 = CALL_EXPR_ARG (exp, 3);
39932 arg4 = CALL_EXPR_ARG (exp, 4);
39933 op0 = expand_normal (arg0);
39934 op1 = expand_normal (arg1);
39935 op2 = expand_normal (arg2);
39936 op3 = expand_normal (arg3);
39937 op4 = expand_normal (arg4);
39938 mode0 = insn_data[icode].operand[0].mode;
39939 mode1 = insn_data[icode].operand[1].mode;
39940 mode3 = insn_data[icode].operand[3].mode;
39941 mode4 = insn_data[icode].operand[4].mode;
39943 op0 = fixup_modeless_constant (op0, mode0);
39945 if (GET_MODE (op0) == mode0
39946 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
39948 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39949 op0 = copy_to_mode_reg (mode0, op0);
39951 else if (op0 != constm1_rtx)
39953 op0 = copy_to_reg (op0);
39954 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39957 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39958 op1 = copy_to_mode_reg (mode1, op1);
39960 /* Force memory operand only with base register here. But we
39961 don't want to do it on memory operand for other builtin
39962 functions. */
39963 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
39965 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
39966 op2 = copy_to_mode_reg (Pmode, op2);
39968 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39970 error ("the forth argument must be scale 1, 2, 4, 8");
39971 return const0_rtx;
39974 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39976 error ("incorrect hint operand");
39977 return const0_rtx;
39980 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39981 if (! pat)
39982 return const0_rtx;
39984 emit_insn (pat);
39986 return 0;
39988 case IX86_BUILTIN_XABORT:
39989 icode = CODE_FOR_xabort;
39990 arg0 = CALL_EXPR_ARG (exp, 0);
39991 op0 = expand_normal (arg0);
39992 mode0 = insn_data[icode].operand[0].mode;
39993 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39995 error ("the xabort's argument must be an 8-bit immediate");
39996 return const0_rtx;
39998 emit_insn (gen_xabort (op0));
39999 return 0;
40001 default:
40002 break;
40005 for (i = 0, d = bdesc_special_args;
40006 i < ARRAY_SIZE (bdesc_special_args);
40007 i++, d++)
40008 if (d->code == fcode)
40009 return ix86_expand_special_args_builtin (d, exp, target);
40011 for (i = 0, d = bdesc_args;
40012 i < ARRAY_SIZE (bdesc_args);
40013 i++, d++)
40014 if (d->code == fcode)
40015 switch (fcode)
40017 case IX86_BUILTIN_FABSQ:
40018 case IX86_BUILTIN_COPYSIGNQ:
40019 if (!TARGET_SSE)
40020 /* Emit a normal call if SSE isn't available. */
40021 return expand_call (exp, target, ignore);
40022 default:
40023 return ix86_expand_args_builtin (d, exp, target);
40026 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40027 if (d->code == fcode)
40028 return ix86_expand_sse_comi (d, exp, target);
40030 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40031 if (d->code == fcode)
40032 return ix86_expand_round_builtin (d, exp, target);
40034 for (i = 0, d = bdesc_pcmpestr;
40035 i < ARRAY_SIZE (bdesc_pcmpestr);
40036 i++, d++)
40037 if (d->code == fcode)
40038 return ix86_expand_sse_pcmpestr (d, exp, target);
40040 for (i = 0, d = bdesc_pcmpistr;
40041 i < ARRAY_SIZE (bdesc_pcmpistr);
40042 i++, d++)
40043 if (d->code == fcode)
40044 return ix86_expand_sse_pcmpistr (d, exp, target);
40046 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40047 if (d->code == fcode)
40048 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40049 (enum ix86_builtin_func_type)
40050 d->flag, d->comparison);
40052 gcc_unreachable ();
40055 /* This returns the target-specific builtin with code CODE if
40056 current_function_decl has visibility on this builtin, which is checked
40057 using isa flags. Returns NULL_TREE otherwise. */
40059 static tree ix86_get_builtin (enum ix86_builtins code)
40061 struct cl_target_option *opts;
40062 tree target_tree = NULL_TREE;
40064 /* Determine the isa flags of current_function_decl. */
40066 if (current_function_decl)
40067 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40069 if (target_tree == NULL)
40070 target_tree = target_option_default_node;
40072 opts = TREE_TARGET_OPTION (target_tree);
40074 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40075 return ix86_builtin_decl (code, true);
40076 else
40077 return NULL_TREE;
40080 /* Return function decl for target specific builtin
40081 for given MPX builtin passed i FCODE. */
40082 static tree
40083 ix86_builtin_mpx_function (unsigned fcode)
40085 switch (fcode)
40087 case BUILT_IN_CHKP_BNDMK:
40088 return ix86_builtins[IX86_BUILTIN_BNDMK];
40090 case BUILT_IN_CHKP_BNDSTX:
40091 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40093 case BUILT_IN_CHKP_BNDLDX:
40094 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40096 case BUILT_IN_CHKP_BNDCL:
40097 return ix86_builtins[IX86_BUILTIN_BNDCL];
40099 case BUILT_IN_CHKP_BNDCU:
40100 return ix86_builtins[IX86_BUILTIN_BNDCU];
40102 case BUILT_IN_CHKP_BNDRET:
40103 return ix86_builtins[IX86_BUILTIN_BNDRET];
40105 case BUILT_IN_CHKP_INTERSECT:
40106 return ix86_builtins[IX86_BUILTIN_BNDINT];
40108 case BUILT_IN_CHKP_NARROW:
40109 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40111 case BUILT_IN_CHKP_SIZEOF:
40112 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40114 case BUILT_IN_CHKP_EXTRACT_LOWER:
40115 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40117 case BUILT_IN_CHKP_EXTRACT_UPPER:
40118 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40120 default:
40121 return NULL_TREE;
40124 gcc_unreachable ();
40127 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40129 Return an address to be used to load/store bounds for pointer
40130 passed in SLOT.
40132 SLOT_NO is an integer constant holding number of a target
40133 dependent special slot to be used in case SLOT is not a memory.
40135 SPECIAL_BASE is a pointer to be used as a base of fake address
40136 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40137 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40139 static rtx
40140 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40142 rtx addr = NULL;
40144 /* NULL slot means we pass bounds for pointer not passed to the
40145 function at all. Register slot means we pass pointer in a
40146 register. In both these cases bounds are passed via Bounds
40147 Table. Since we do not have actual pointer stored in memory,
40148 we have to use fake addresses to access Bounds Table. We
40149 start with (special_base - sizeof (void*)) and decrease this
40150 address by pointer size to get addresses for other slots. */
40151 if (!slot || REG_P (slot))
40153 gcc_assert (CONST_INT_P (slot_no));
40154 addr = plus_constant (Pmode, special_base,
40155 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40157 /* If pointer is passed in a memory then its address is used to
40158 access Bounds Table. */
40159 else if (MEM_P (slot))
40161 addr = XEXP (slot, 0);
40162 if (!register_operand (addr, Pmode))
40163 addr = copy_addr_to_reg (addr);
40165 else
40166 gcc_unreachable ();
40168 return addr;
40171 /* Expand pass uses this hook to load bounds for function parameter
40172 PTR passed in SLOT in case its bounds are not passed in a register.
40174 If SLOT is a memory, then bounds are loaded as for regular pointer
40175 loaded from memory. PTR may be NULL in case SLOT is a memory.
40176 In such case value of PTR (if required) may be loaded from SLOT.
40178 If SLOT is NULL or a register then SLOT_NO is an integer constant
40179 holding number of the target dependent special slot which should be
40180 used to obtain bounds.
40182 Return loaded bounds. */
40184 static rtx
40185 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40187 rtx reg = gen_reg_rtx (BNDmode);
40188 rtx addr;
40190 /* Get address to be used to access Bounds Table. Special slots start
40191 at the location of return address of the current function. */
40192 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40194 /* Load pointer value from a memory if we don't have it. */
40195 if (!ptr)
40197 gcc_assert (MEM_P (slot));
40198 ptr = copy_addr_to_reg (slot);
40201 emit_insn (BNDmode == BND64mode
40202 ? gen_bnd64_ldx (reg, addr, ptr)
40203 : gen_bnd32_ldx (reg, addr, ptr));
40205 return reg;
40208 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40209 passed in SLOT in case BOUNDS are not passed in a register.
40211 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40212 stored in memory. PTR may be NULL in case SLOT is a memory.
40213 In such case value of PTR (if required) may be loaded from SLOT.
40215 If SLOT is NULL or a register then SLOT_NO is an integer constant
40216 holding number of the target dependent special slot which should be
40217 used to store BOUNDS. */
40219 static void
40220 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40222 rtx addr;
40224 /* Get address to be used to access Bounds Table. Special slots start
40225 at the location of return address of a called function. */
40226 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40228 /* Load pointer value from a memory if we don't have it. */
40229 if (!ptr)
40231 gcc_assert (MEM_P (slot));
40232 ptr = copy_addr_to_reg (slot);
40235 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40236 if (!register_operand (bounds, BNDmode))
40237 bounds = copy_to_mode_reg (BNDmode, bounds);
40239 emit_insn (BNDmode == BND64mode
40240 ? gen_bnd64_stx (addr, ptr, bounds)
40241 : gen_bnd32_stx (addr, ptr, bounds));
40244 /* Load and return bounds returned by function in SLOT. */
40246 static rtx
40247 ix86_load_returned_bounds (rtx slot)
40249 rtx res;
40251 gcc_assert (REG_P (slot));
40252 res = gen_reg_rtx (BNDmode);
40253 emit_move_insn (res, slot);
40255 return res;
40258 /* Store BOUNDS returned by function into SLOT. */
40260 static void
40261 ix86_store_returned_bounds (rtx slot, rtx bounds)
40263 gcc_assert (REG_P (slot));
40264 emit_move_insn (slot, bounds);
40267 /* Returns a function decl for a vectorized version of the builtin function
40268 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40269 if it is not available. */
40271 static tree
40272 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40273 tree type_in)
40275 machine_mode in_mode, out_mode;
40276 int in_n, out_n;
40277 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40279 if (TREE_CODE (type_out) != VECTOR_TYPE
40280 || TREE_CODE (type_in) != VECTOR_TYPE
40281 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40282 return NULL_TREE;
40284 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40285 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40286 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40287 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40289 switch (fn)
40291 case BUILT_IN_SQRT:
40292 if (out_mode == DFmode && in_mode == DFmode)
40294 if (out_n == 2 && in_n == 2)
40295 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40296 else if (out_n == 4 && in_n == 4)
40297 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40298 else if (out_n == 8 && in_n == 8)
40299 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40301 break;
40303 case BUILT_IN_EXP2F:
40304 if (out_mode == SFmode && in_mode == SFmode)
40306 if (out_n == 16 && in_n == 16)
40307 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40309 break;
40311 case BUILT_IN_SQRTF:
40312 if (out_mode == SFmode && in_mode == SFmode)
40314 if (out_n == 4 && in_n == 4)
40315 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40316 else if (out_n == 8 && in_n == 8)
40317 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40318 else if (out_n == 16 && in_n == 16)
40319 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40321 break;
40323 case BUILT_IN_IFLOOR:
40324 case BUILT_IN_LFLOOR:
40325 case BUILT_IN_LLFLOOR:
40326 /* The round insn does not trap on denormals. */
40327 if (flag_trapping_math || !TARGET_ROUND)
40328 break;
40330 if (out_mode == SImode && in_mode == DFmode)
40332 if (out_n == 4 && in_n == 2)
40333 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40334 else if (out_n == 8 && in_n == 4)
40335 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40336 else if (out_n == 16 && in_n == 8)
40337 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40339 break;
40341 case BUILT_IN_IFLOORF:
40342 case BUILT_IN_LFLOORF:
40343 case BUILT_IN_LLFLOORF:
40344 /* The round insn does not trap on denormals. */
40345 if (flag_trapping_math || !TARGET_ROUND)
40346 break;
40348 if (out_mode == SImode && in_mode == SFmode)
40350 if (out_n == 4 && in_n == 4)
40351 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40352 else if (out_n == 8 && in_n == 8)
40353 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40355 break;
40357 case BUILT_IN_ICEIL:
40358 case BUILT_IN_LCEIL:
40359 case BUILT_IN_LLCEIL:
40360 /* The round insn does not trap on denormals. */
40361 if (flag_trapping_math || !TARGET_ROUND)
40362 break;
40364 if (out_mode == SImode && in_mode == DFmode)
40366 if (out_n == 4 && in_n == 2)
40367 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40368 else if (out_n == 8 && in_n == 4)
40369 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40370 else if (out_n == 16 && in_n == 8)
40371 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40373 break;
40375 case BUILT_IN_ICEILF:
40376 case BUILT_IN_LCEILF:
40377 case BUILT_IN_LLCEILF:
40378 /* The round insn does not trap on denormals. */
40379 if (flag_trapping_math || !TARGET_ROUND)
40380 break;
40382 if (out_mode == SImode && in_mode == SFmode)
40384 if (out_n == 4 && in_n == 4)
40385 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40386 else if (out_n == 8 && in_n == 8)
40387 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40389 break;
40391 case BUILT_IN_IRINT:
40392 case BUILT_IN_LRINT:
40393 case BUILT_IN_LLRINT:
40394 if (out_mode == SImode && in_mode == DFmode)
40396 if (out_n == 4 && in_n == 2)
40397 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40398 else if (out_n == 8 && in_n == 4)
40399 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40401 break;
40403 case BUILT_IN_IRINTF:
40404 case BUILT_IN_LRINTF:
40405 case BUILT_IN_LLRINTF:
40406 if (out_mode == SImode && in_mode == SFmode)
40408 if (out_n == 4 && in_n == 4)
40409 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40410 else if (out_n == 8 && in_n == 8)
40411 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40413 break;
40415 case BUILT_IN_IROUND:
40416 case BUILT_IN_LROUND:
40417 case BUILT_IN_LLROUND:
40418 /* The round insn does not trap on denormals. */
40419 if (flag_trapping_math || !TARGET_ROUND)
40420 break;
40422 if (out_mode == SImode && in_mode == DFmode)
40424 if (out_n == 4 && in_n == 2)
40425 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40426 else if (out_n == 8 && in_n == 4)
40427 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40428 else if (out_n == 16 && in_n == 8)
40429 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40431 break;
40433 case BUILT_IN_IROUNDF:
40434 case BUILT_IN_LROUNDF:
40435 case BUILT_IN_LLROUNDF:
40436 /* The round insn does not trap on denormals. */
40437 if (flag_trapping_math || !TARGET_ROUND)
40438 break;
40440 if (out_mode == SImode && in_mode == SFmode)
40442 if (out_n == 4 && in_n == 4)
40443 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40444 else if (out_n == 8 && in_n == 8)
40445 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40447 break;
40449 case BUILT_IN_COPYSIGN:
40450 if (out_mode == DFmode && in_mode == DFmode)
40452 if (out_n == 2 && in_n == 2)
40453 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40454 else if (out_n == 4 && in_n == 4)
40455 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40456 else if (out_n == 8 && in_n == 8)
40457 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40459 break;
40461 case BUILT_IN_COPYSIGNF:
40462 if (out_mode == SFmode && in_mode == SFmode)
40464 if (out_n == 4 && in_n == 4)
40465 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40466 else if (out_n == 8 && in_n == 8)
40467 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40468 else if (out_n == 16 && in_n == 16)
40469 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40471 break;
40473 case BUILT_IN_FLOOR:
40474 /* The round insn does not trap on denormals. */
40475 if (flag_trapping_math || !TARGET_ROUND)
40476 break;
40478 if (out_mode == DFmode && in_mode == DFmode)
40480 if (out_n == 2 && in_n == 2)
40481 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40482 else if (out_n == 4 && in_n == 4)
40483 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40485 break;
40487 case BUILT_IN_FLOORF:
40488 /* The round insn does not trap on denormals. */
40489 if (flag_trapping_math || !TARGET_ROUND)
40490 break;
40492 if (out_mode == SFmode && in_mode == SFmode)
40494 if (out_n == 4 && in_n == 4)
40495 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40496 else if (out_n == 8 && in_n == 8)
40497 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40499 break;
40501 case BUILT_IN_CEIL:
40502 /* The round insn does not trap on denormals. */
40503 if (flag_trapping_math || !TARGET_ROUND)
40504 break;
40506 if (out_mode == DFmode && in_mode == DFmode)
40508 if (out_n == 2 && in_n == 2)
40509 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40510 else if (out_n == 4 && in_n == 4)
40511 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40513 break;
40515 case BUILT_IN_CEILF:
40516 /* The round insn does not trap on denormals. */
40517 if (flag_trapping_math || !TARGET_ROUND)
40518 break;
40520 if (out_mode == SFmode && in_mode == SFmode)
40522 if (out_n == 4 && in_n == 4)
40523 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40524 else if (out_n == 8 && in_n == 8)
40525 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40527 break;
40529 case BUILT_IN_TRUNC:
40530 /* The round insn does not trap on denormals. */
40531 if (flag_trapping_math || !TARGET_ROUND)
40532 break;
40534 if (out_mode == DFmode && in_mode == DFmode)
40536 if (out_n == 2 && in_n == 2)
40537 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40538 else if (out_n == 4 && in_n == 4)
40539 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40541 break;
40543 case BUILT_IN_TRUNCF:
40544 /* The round insn does not trap on denormals. */
40545 if (flag_trapping_math || !TARGET_ROUND)
40546 break;
40548 if (out_mode == SFmode && in_mode == SFmode)
40550 if (out_n == 4 && in_n == 4)
40551 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40552 else if (out_n == 8 && in_n == 8)
40553 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40555 break;
40557 case BUILT_IN_RINT:
40558 /* The round insn does not trap on denormals. */
40559 if (flag_trapping_math || !TARGET_ROUND)
40560 break;
40562 if (out_mode == DFmode && in_mode == DFmode)
40564 if (out_n == 2 && in_n == 2)
40565 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40566 else if (out_n == 4 && in_n == 4)
40567 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40569 break;
40571 case BUILT_IN_RINTF:
40572 /* The round insn does not trap on denormals. */
40573 if (flag_trapping_math || !TARGET_ROUND)
40574 break;
40576 if (out_mode == SFmode && in_mode == SFmode)
40578 if (out_n == 4 && in_n == 4)
40579 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40580 else if (out_n == 8 && in_n == 8)
40581 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40583 break;
40585 case BUILT_IN_ROUND:
40586 /* The round insn does not trap on denormals. */
40587 if (flag_trapping_math || !TARGET_ROUND)
40588 break;
40590 if (out_mode == DFmode && in_mode == DFmode)
40592 if (out_n == 2 && in_n == 2)
40593 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40594 else if (out_n == 4 && in_n == 4)
40595 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40597 break;
40599 case BUILT_IN_ROUNDF:
40600 /* The round insn does not trap on denormals. */
40601 if (flag_trapping_math || !TARGET_ROUND)
40602 break;
40604 if (out_mode == SFmode && in_mode == SFmode)
40606 if (out_n == 4 && in_n == 4)
40607 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40608 else if (out_n == 8 && in_n == 8)
40609 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40611 break;
40613 case BUILT_IN_FMA:
40614 if (out_mode == DFmode && in_mode == DFmode)
40616 if (out_n == 2 && in_n == 2)
40617 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40618 if (out_n == 4 && in_n == 4)
40619 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40621 break;
40623 case BUILT_IN_FMAF:
40624 if (out_mode == SFmode && in_mode == SFmode)
40626 if (out_n == 4 && in_n == 4)
40627 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40628 if (out_n == 8 && in_n == 8)
40629 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40631 break;
40633 default:
40634 break;
40637 /* Dispatch to a handler for a vectorization library. */
40638 if (ix86_veclib_handler)
40639 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40640 type_in);
40642 return NULL_TREE;
40645 /* Handler for an SVML-style interface to
40646 a library with vectorized intrinsics. */
40648 static tree
40649 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40651 char name[20];
40652 tree fntype, new_fndecl, args;
40653 unsigned arity;
40654 const char *bname;
40655 machine_mode el_mode, in_mode;
40656 int n, in_n;
40658 /* The SVML is suitable for unsafe math only. */
40659 if (!flag_unsafe_math_optimizations)
40660 return NULL_TREE;
40662 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40663 n = TYPE_VECTOR_SUBPARTS (type_out);
40664 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40665 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40666 if (el_mode != in_mode
40667 || n != in_n)
40668 return NULL_TREE;
40670 switch (fn)
40672 case BUILT_IN_EXP:
40673 case BUILT_IN_LOG:
40674 case BUILT_IN_LOG10:
40675 case BUILT_IN_POW:
40676 case BUILT_IN_TANH:
40677 case BUILT_IN_TAN:
40678 case BUILT_IN_ATAN:
40679 case BUILT_IN_ATAN2:
40680 case BUILT_IN_ATANH:
40681 case BUILT_IN_CBRT:
40682 case BUILT_IN_SINH:
40683 case BUILT_IN_SIN:
40684 case BUILT_IN_ASINH:
40685 case BUILT_IN_ASIN:
40686 case BUILT_IN_COSH:
40687 case BUILT_IN_COS:
40688 case BUILT_IN_ACOSH:
40689 case BUILT_IN_ACOS:
40690 if (el_mode != DFmode || n != 2)
40691 return NULL_TREE;
40692 break;
40694 case BUILT_IN_EXPF:
40695 case BUILT_IN_LOGF:
40696 case BUILT_IN_LOG10F:
40697 case BUILT_IN_POWF:
40698 case BUILT_IN_TANHF:
40699 case BUILT_IN_TANF:
40700 case BUILT_IN_ATANF:
40701 case BUILT_IN_ATAN2F:
40702 case BUILT_IN_ATANHF:
40703 case BUILT_IN_CBRTF:
40704 case BUILT_IN_SINHF:
40705 case BUILT_IN_SINF:
40706 case BUILT_IN_ASINHF:
40707 case BUILT_IN_ASINF:
40708 case BUILT_IN_COSHF:
40709 case BUILT_IN_COSF:
40710 case BUILT_IN_ACOSHF:
40711 case BUILT_IN_ACOSF:
40712 if (el_mode != SFmode || n != 4)
40713 return NULL_TREE;
40714 break;
40716 default:
40717 return NULL_TREE;
40720 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40722 if (fn == BUILT_IN_LOGF)
40723 strcpy (name, "vmlsLn4");
40724 else if (fn == BUILT_IN_LOG)
40725 strcpy (name, "vmldLn2");
40726 else if (n == 4)
40728 sprintf (name, "vmls%s", bname+10);
40729 name[strlen (name)-1] = '4';
40731 else
40732 sprintf (name, "vmld%s2", bname+10);
40734 /* Convert to uppercase. */
40735 name[4] &= ~0x20;
40737 arity = 0;
40738 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40739 args;
40740 args = TREE_CHAIN (args))
40741 arity++;
40743 if (arity == 1)
40744 fntype = build_function_type_list (type_out, type_in, NULL);
40745 else
40746 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40748 /* Build a function declaration for the vectorized function. */
40749 new_fndecl = build_decl (BUILTINS_LOCATION,
40750 FUNCTION_DECL, get_identifier (name), fntype);
40751 TREE_PUBLIC (new_fndecl) = 1;
40752 DECL_EXTERNAL (new_fndecl) = 1;
40753 DECL_IS_NOVOPS (new_fndecl) = 1;
40754 TREE_READONLY (new_fndecl) = 1;
40756 return new_fndecl;
40759 /* Handler for an ACML-style interface to
40760 a library with vectorized intrinsics. */
40762 static tree
40763 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40765 char name[20] = "__vr.._";
40766 tree fntype, new_fndecl, args;
40767 unsigned arity;
40768 const char *bname;
40769 machine_mode el_mode, in_mode;
40770 int n, in_n;
40772 /* The ACML is 64bits only and suitable for unsafe math only as
40773 it does not correctly support parts of IEEE with the required
40774 precision such as denormals. */
40775 if (!TARGET_64BIT
40776 || !flag_unsafe_math_optimizations)
40777 return NULL_TREE;
40779 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40780 n = TYPE_VECTOR_SUBPARTS (type_out);
40781 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40782 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40783 if (el_mode != in_mode
40784 || n != in_n)
40785 return NULL_TREE;
40787 switch (fn)
40789 case BUILT_IN_SIN:
40790 case BUILT_IN_COS:
40791 case BUILT_IN_EXP:
40792 case BUILT_IN_LOG:
40793 case BUILT_IN_LOG2:
40794 case BUILT_IN_LOG10:
40795 name[4] = 'd';
40796 name[5] = '2';
40797 if (el_mode != DFmode
40798 || n != 2)
40799 return NULL_TREE;
40800 break;
40802 case BUILT_IN_SINF:
40803 case BUILT_IN_COSF:
40804 case BUILT_IN_EXPF:
40805 case BUILT_IN_POWF:
40806 case BUILT_IN_LOGF:
40807 case BUILT_IN_LOG2F:
40808 case BUILT_IN_LOG10F:
40809 name[4] = 's';
40810 name[5] = '4';
40811 if (el_mode != SFmode
40812 || n != 4)
40813 return NULL_TREE;
40814 break;
40816 default:
40817 return NULL_TREE;
40820 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40821 sprintf (name + 7, "%s", bname+10);
40823 arity = 0;
40824 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40825 args;
40826 args = TREE_CHAIN (args))
40827 arity++;
40829 if (arity == 1)
40830 fntype = build_function_type_list (type_out, type_in, NULL);
40831 else
40832 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40834 /* Build a function declaration for the vectorized function. */
40835 new_fndecl = build_decl (BUILTINS_LOCATION,
40836 FUNCTION_DECL, get_identifier (name), fntype);
40837 TREE_PUBLIC (new_fndecl) = 1;
40838 DECL_EXTERNAL (new_fndecl) = 1;
40839 DECL_IS_NOVOPS (new_fndecl) = 1;
40840 TREE_READONLY (new_fndecl) = 1;
40842 return new_fndecl;
40845 /* Returns a decl of a function that implements gather load with
40846 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40847 Return NULL_TREE if it is not available. */
40849 static tree
40850 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40851 const_tree index_type, int scale)
40853 bool si;
40854 enum ix86_builtins code;
40856 if (! TARGET_AVX2)
40857 return NULL_TREE;
40859 if ((TREE_CODE (index_type) != INTEGER_TYPE
40860 && !POINTER_TYPE_P (index_type))
40861 || (TYPE_MODE (index_type) != SImode
40862 && TYPE_MODE (index_type) != DImode))
40863 return NULL_TREE;
40865 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40866 return NULL_TREE;
40868 /* v*gather* insn sign extends index to pointer mode. */
40869 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40870 && TYPE_UNSIGNED (index_type))
40871 return NULL_TREE;
40873 if (scale <= 0
40874 || scale > 8
40875 || (scale & (scale - 1)) != 0)
40876 return NULL_TREE;
40878 si = TYPE_MODE (index_type) == SImode;
40879 switch (TYPE_MODE (mem_vectype))
40881 case V2DFmode:
40882 if (TARGET_AVX512VL)
40883 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40884 else
40885 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40886 break;
40887 case V4DFmode:
40888 if (TARGET_AVX512VL)
40889 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40890 else
40891 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40892 break;
40893 case V2DImode:
40894 if (TARGET_AVX512VL)
40895 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40896 else
40897 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40898 break;
40899 case V4DImode:
40900 if (TARGET_AVX512VL)
40901 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40902 else
40903 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40904 break;
40905 case V4SFmode:
40906 if (TARGET_AVX512VL)
40907 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40908 else
40909 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40910 break;
40911 case V8SFmode:
40912 if (TARGET_AVX512VL)
40913 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40914 else
40915 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40916 break;
40917 case V4SImode:
40918 if (TARGET_AVX512VL)
40919 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40920 else
40921 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40922 break;
40923 case V8SImode:
40924 if (TARGET_AVX512VL)
40925 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40926 else
40927 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40928 break;
40929 case V8DFmode:
40930 if (TARGET_AVX512F)
40931 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
40932 else
40933 return NULL_TREE;
40934 break;
40935 case V8DImode:
40936 if (TARGET_AVX512F)
40937 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
40938 else
40939 return NULL_TREE;
40940 break;
40941 case V16SFmode:
40942 if (TARGET_AVX512F)
40943 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
40944 else
40945 return NULL_TREE;
40946 break;
40947 case V16SImode:
40948 if (TARGET_AVX512F)
40949 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
40950 else
40951 return NULL_TREE;
40952 break;
40953 default:
40954 return NULL_TREE;
40957 return ix86_get_builtin (code);
40960 /* Returns a code for a target-specific builtin that implements
40961 reciprocal of the function, or NULL_TREE if not available. */
40963 static tree
40964 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
40966 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
40967 && flag_finite_math_only && !flag_trapping_math
40968 && flag_unsafe_math_optimizations))
40969 return NULL_TREE;
40971 if (md_fn)
40972 /* Machine dependent builtins. */
40973 switch (fn)
40975 /* Vectorized version of sqrt to rsqrt conversion. */
40976 case IX86_BUILTIN_SQRTPS_NR:
40977 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
40979 case IX86_BUILTIN_SQRTPS_NR256:
40980 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
40982 default:
40983 return NULL_TREE;
40985 else
40986 /* Normal builtins. */
40987 switch (fn)
40989 /* Sqrt to rsqrt conversion. */
40990 case BUILT_IN_SQRTF:
40991 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
40993 default:
40994 return NULL_TREE;
40998 /* Helper for avx_vpermilps256_operand et al. This is also used by
40999 the expansion functions to turn the parallel back into a mask.
41000 The return value is 0 for no match and the imm8+1 for a match. */
41003 avx_vpermilp_parallel (rtx par, machine_mode mode)
41005 unsigned i, nelt = GET_MODE_NUNITS (mode);
41006 unsigned mask = 0;
41007 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41009 if (XVECLEN (par, 0) != (int) nelt)
41010 return 0;
41012 /* Validate that all of the elements are constants, and not totally
41013 out of range. Copy the data into an integral array to make the
41014 subsequent checks easier. */
41015 for (i = 0; i < nelt; ++i)
41017 rtx er = XVECEXP (par, 0, i);
41018 unsigned HOST_WIDE_INT ei;
41020 if (!CONST_INT_P (er))
41021 return 0;
41022 ei = INTVAL (er);
41023 if (ei >= nelt)
41024 return 0;
41025 ipar[i] = ei;
41028 switch (mode)
41030 case V8DFmode:
41031 /* In the 512-bit DFmode case, we can only move elements within
41032 a 128-bit lane. First fill the second part of the mask,
41033 then fallthru. */
41034 for (i = 4; i < 6; ++i)
41036 if (ipar[i] < 4 || ipar[i] >= 6)
41037 return 0;
41038 mask |= (ipar[i] - 4) << i;
41040 for (i = 6; i < 8; ++i)
41042 if (ipar[i] < 6)
41043 return 0;
41044 mask |= (ipar[i] - 6) << i;
41046 /* FALLTHRU */
41048 case V4DFmode:
41049 /* In the 256-bit DFmode case, we can only move elements within
41050 a 128-bit lane. */
41051 for (i = 0; i < 2; ++i)
41053 if (ipar[i] >= 2)
41054 return 0;
41055 mask |= ipar[i] << i;
41057 for (i = 2; i < 4; ++i)
41059 if (ipar[i] < 2)
41060 return 0;
41061 mask |= (ipar[i] - 2) << i;
41063 break;
41065 case V16SFmode:
41066 /* In 512 bit SFmode case, permutation in the upper 256 bits
41067 must mirror the permutation in the lower 256-bits. */
41068 for (i = 0; i < 8; ++i)
41069 if (ipar[i] + 8 != ipar[i + 8])
41070 return 0;
41071 /* FALLTHRU */
41073 case V8SFmode:
41074 /* In 256 bit SFmode case, we have full freedom of
41075 movement within the low 128-bit lane, but the high 128-bit
41076 lane must mirror the exact same pattern. */
41077 for (i = 0; i < 4; ++i)
41078 if (ipar[i] + 4 != ipar[i + 4])
41079 return 0;
41080 nelt = 4;
41081 /* FALLTHRU */
41083 case V2DFmode:
41084 case V4SFmode:
41085 /* In the 128-bit case, we've full freedom in the placement of
41086 the elements from the source operand. */
41087 for (i = 0; i < nelt; ++i)
41088 mask |= ipar[i] << (i * (nelt / 2));
41089 break;
41091 default:
41092 gcc_unreachable ();
41095 /* Make sure success has a non-zero value by adding one. */
41096 return mask + 1;
41099 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41100 the expansion functions to turn the parallel back into a mask.
41101 The return value is 0 for no match and the imm8+1 for a match. */
41104 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41106 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41107 unsigned mask = 0;
41108 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41110 if (XVECLEN (par, 0) != (int) nelt)
41111 return 0;
41113 /* Validate that all of the elements are constants, and not totally
41114 out of range. Copy the data into an integral array to make the
41115 subsequent checks easier. */
41116 for (i = 0; i < nelt; ++i)
41118 rtx er = XVECEXP (par, 0, i);
41119 unsigned HOST_WIDE_INT ei;
41121 if (!CONST_INT_P (er))
41122 return 0;
41123 ei = INTVAL (er);
41124 if (ei >= 2 * nelt)
41125 return 0;
41126 ipar[i] = ei;
41129 /* Validate that the halves of the permute are halves. */
41130 for (i = 0; i < nelt2 - 1; ++i)
41131 if (ipar[i] + 1 != ipar[i + 1])
41132 return 0;
41133 for (i = nelt2; i < nelt - 1; ++i)
41134 if (ipar[i] + 1 != ipar[i + 1])
41135 return 0;
41137 /* Reconstruct the mask. */
41138 for (i = 0; i < 2; ++i)
41140 unsigned e = ipar[i * nelt2];
41141 if (e % nelt2)
41142 return 0;
41143 e /= nelt2;
41144 mask |= e << (i * 4);
41147 /* Make sure success has a non-zero value by adding one. */
41148 return mask + 1;
41151 /* Return a register priority for hard reg REGNO. */
41152 static int
41153 ix86_register_priority (int hard_regno)
41155 /* ebp and r13 as the base always wants a displacement, r12 as the
41156 base always wants an index. So discourage their usage in an
41157 address. */
41158 if (hard_regno == R12_REG || hard_regno == R13_REG)
41159 return 0;
41160 if (hard_regno == BP_REG)
41161 return 1;
41162 /* New x86-64 int registers result in bigger code size. Discourage
41163 them. */
41164 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41165 return 2;
41166 /* New x86-64 SSE registers result in bigger code size. Discourage
41167 them. */
41168 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41169 return 2;
41170 /* Usage of AX register results in smaller code. Prefer it. */
41171 if (hard_regno == AX_REG)
41172 return 4;
41173 return 3;
41176 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41178 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41179 QImode must go into class Q_REGS.
41180 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41181 movdf to do mem-to-mem moves through integer regs. */
41183 static reg_class_t
41184 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41186 machine_mode mode = GET_MODE (x);
41188 /* We're only allowed to return a subclass of CLASS. Many of the
41189 following checks fail for NO_REGS, so eliminate that early. */
41190 if (regclass == NO_REGS)
41191 return NO_REGS;
41193 /* All classes can load zeros. */
41194 if (x == CONST0_RTX (mode))
41195 return regclass;
41197 /* Force constants into memory if we are loading a (nonzero) constant into
41198 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41199 instructions to load from a constant. */
41200 if (CONSTANT_P (x)
41201 && (MAYBE_MMX_CLASS_P (regclass)
41202 || MAYBE_SSE_CLASS_P (regclass)
41203 || MAYBE_MASK_CLASS_P (regclass)))
41204 return NO_REGS;
41206 /* Prefer SSE regs only, if we can use them for math. */
41207 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41208 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41210 /* Floating-point constants need more complex checks. */
41211 if (CONST_DOUBLE_P (x))
41213 /* General regs can load everything. */
41214 if (reg_class_subset_p (regclass, GENERAL_REGS))
41215 return regclass;
41217 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41218 zero above. We only want to wind up preferring 80387 registers if
41219 we plan on doing computation with them. */
41220 if (TARGET_80387
41221 && standard_80387_constant_p (x) > 0)
41223 /* Limit class to non-sse. */
41224 if (regclass == FLOAT_SSE_REGS)
41225 return FLOAT_REGS;
41226 if (regclass == FP_TOP_SSE_REGS)
41227 return FP_TOP_REG;
41228 if (regclass == FP_SECOND_SSE_REGS)
41229 return FP_SECOND_REG;
41230 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41231 return regclass;
41234 return NO_REGS;
41237 /* Generally when we see PLUS here, it's the function invariant
41238 (plus soft-fp const_int). Which can only be computed into general
41239 regs. */
41240 if (GET_CODE (x) == PLUS)
41241 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41243 /* QImode constants are easy to load, but non-constant QImode data
41244 must go into Q_REGS. */
41245 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41247 if (reg_class_subset_p (regclass, Q_REGS))
41248 return regclass;
41249 if (reg_class_subset_p (Q_REGS, regclass))
41250 return Q_REGS;
41251 return NO_REGS;
41254 return regclass;
41257 /* Discourage putting floating-point values in SSE registers unless
41258 SSE math is being used, and likewise for the 387 registers. */
41259 static reg_class_t
41260 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41262 machine_mode mode = GET_MODE (x);
41264 /* Restrict the output reload class to the register bank that we are doing
41265 math on. If we would like not to return a subset of CLASS, reject this
41266 alternative: if reload cannot do this, it will still use its choice. */
41267 mode = GET_MODE (x);
41268 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41269 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41271 if (X87_FLOAT_MODE_P (mode))
41273 if (regclass == FP_TOP_SSE_REGS)
41274 return FP_TOP_REG;
41275 else if (regclass == FP_SECOND_SSE_REGS)
41276 return FP_SECOND_REG;
41277 else
41278 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41281 return regclass;
41284 static reg_class_t
41285 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41286 machine_mode mode, secondary_reload_info *sri)
41288 /* Double-word spills from general registers to non-offsettable memory
41289 references (zero-extended addresses) require special handling. */
41290 if (TARGET_64BIT
41291 && MEM_P (x)
41292 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41293 && INTEGER_CLASS_P (rclass)
41294 && !offsettable_memref_p (x))
41296 sri->icode = (in_p
41297 ? CODE_FOR_reload_noff_load
41298 : CODE_FOR_reload_noff_store);
41299 /* Add the cost of moving address to a temporary. */
41300 sri->extra_cost = 1;
41302 return NO_REGS;
41305 /* QImode spills from non-QI registers require
41306 intermediate register on 32bit targets. */
41307 if (mode == QImode
41308 && (MAYBE_MASK_CLASS_P (rclass)
41309 || (!TARGET_64BIT && !in_p
41310 && INTEGER_CLASS_P (rclass)
41311 && MAYBE_NON_Q_CLASS_P (rclass))))
41313 int regno;
41315 if (REG_P (x))
41316 regno = REGNO (x);
41317 else
41318 regno = -1;
41320 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41321 regno = true_regnum (x);
41323 /* Return Q_REGS if the operand is in memory. */
41324 if (regno == -1)
41325 return Q_REGS;
41328 /* This condition handles corner case where an expression involving
41329 pointers gets vectorized. We're trying to use the address of a
41330 stack slot as a vector initializer.
41332 (set (reg:V2DI 74 [ vect_cst_.2 ])
41333 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41335 Eventually frame gets turned into sp+offset like this:
41337 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41338 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41339 (const_int 392 [0x188]))))
41341 That later gets turned into:
41343 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41344 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41345 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41347 We'll have the following reload recorded:
41349 Reload 0: reload_in (DI) =
41350 (plus:DI (reg/f:DI 7 sp)
41351 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41352 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41353 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41354 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41355 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41356 reload_reg_rtx: (reg:V2DI 22 xmm1)
41358 Which isn't going to work since SSE instructions can't handle scalar
41359 additions. Returning GENERAL_REGS forces the addition into integer
41360 register and reload can handle subsequent reloads without problems. */
41362 if (in_p && GET_CODE (x) == PLUS
41363 && SSE_CLASS_P (rclass)
41364 && SCALAR_INT_MODE_P (mode))
41365 return GENERAL_REGS;
41367 return NO_REGS;
41370 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41372 static bool
41373 ix86_class_likely_spilled_p (reg_class_t rclass)
41375 switch (rclass)
41377 case AREG:
41378 case DREG:
41379 case CREG:
41380 case BREG:
41381 case AD_REGS:
41382 case SIREG:
41383 case DIREG:
41384 case SSE_FIRST_REG:
41385 case FP_TOP_REG:
41386 case FP_SECOND_REG:
41387 case BND_REGS:
41388 return true;
41390 default:
41391 break;
41394 return false;
41397 /* If we are copying between general and FP registers, we need a memory
41398 location. The same is true for SSE and MMX registers.
41400 To optimize register_move_cost performance, allow inline variant.
41402 The macro can't work reliably when one of the CLASSES is class containing
41403 registers from multiple units (SSE, MMX, integer). We avoid this by never
41404 combining those units in single alternative in the machine description.
41405 Ensure that this constraint holds to avoid unexpected surprises.
41407 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41408 enforce these sanity checks. */
41410 static inline bool
41411 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41412 machine_mode mode, int strict)
41414 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41415 return false;
41416 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41417 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41418 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41419 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41420 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41421 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41423 gcc_assert (!strict || lra_in_progress);
41424 return true;
41427 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41428 return true;
41430 /* Between mask and general, we have moves no larger than word size. */
41431 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41432 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41433 return true;
41435 /* ??? This is a lie. We do have moves between mmx/general, and for
41436 mmx/sse2. But by saying we need secondary memory we discourage the
41437 register allocator from using the mmx registers unless needed. */
41438 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41439 return true;
41441 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41443 /* SSE1 doesn't have any direct moves from other classes. */
41444 if (!TARGET_SSE2)
41445 return true;
41447 /* If the target says that inter-unit moves are more expensive
41448 than moving through memory, then don't generate them. */
41449 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41450 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41451 return true;
41453 /* Between SSE and general, we have moves no larger than word size. */
41454 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41455 return true;
41458 return false;
41461 bool
41462 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41463 machine_mode mode, int strict)
41465 return inline_secondary_memory_needed (class1, class2, mode, strict);
41468 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41470 On the 80386, this is the size of MODE in words,
41471 except in the FP regs, where a single reg is always enough. */
41473 static unsigned char
41474 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41476 if (MAYBE_INTEGER_CLASS_P (rclass))
41478 if (mode == XFmode)
41479 return (TARGET_64BIT ? 2 : 3);
41480 else if (mode == XCmode)
41481 return (TARGET_64BIT ? 4 : 6);
41482 else
41483 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41485 else
41487 if (COMPLEX_MODE_P (mode))
41488 return 2;
41489 else
41490 return 1;
41494 /* Return true if the registers in CLASS cannot represent the change from
41495 modes FROM to TO. */
41497 bool
41498 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41499 enum reg_class regclass)
41501 if (from == to)
41502 return false;
41504 /* x87 registers can't do subreg at all, as all values are reformatted
41505 to extended precision. */
41506 if (MAYBE_FLOAT_CLASS_P (regclass))
41507 return true;
41509 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41511 /* Vector registers do not support QI or HImode loads. If we don't
41512 disallow a change to these modes, reload will assume it's ok to
41513 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41514 the vec_dupv4hi pattern. */
41515 if (GET_MODE_SIZE (from) < 4)
41516 return true;
41519 return false;
41522 /* Return the cost of moving data of mode M between a
41523 register and memory. A value of 2 is the default; this cost is
41524 relative to those in `REGISTER_MOVE_COST'.
41526 This function is used extensively by register_move_cost that is used to
41527 build tables at startup. Make it inline in this case.
41528 When IN is 2, return maximum of in and out move cost.
41530 If moving between registers and memory is more expensive than
41531 between two registers, you should define this macro to express the
41532 relative cost.
41534 Model also increased moving costs of QImode registers in non
41535 Q_REGS classes.
41537 static inline int
41538 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41539 int in)
41541 int cost;
41542 if (FLOAT_CLASS_P (regclass))
41544 int index;
41545 switch (mode)
41547 case SFmode:
41548 index = 0;
41549 break;
41550 case DFmode:
41551 index = 1;
41552 break;
41553 case XFmode:
41554 index = 2;
41555 break;
41556 default:
41557 return 100;
41559 if (in == 2)
41560 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41561 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41563 if (SSE_CLASS_P (regclass))
41565 int index;
41566 switch (GET_MODE_SIZE (mode))
41568 case 4:
41569 index = 0;
41570 break;
41571 case 8:
41572 index = 1;
41573 break;
41574 case 16:
41575 index = 2;
41576 break;
41577 default:
41578 return 100;
41580 if (in == 2)
41581 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41582 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41584 if (MMX_CLASS_P (regclass))
41586 int index;
41587 switch (GET_MODE_SIZE (mode))
41589 case 4:
41590 index = 0;
41591 break;
41592 case 8:
41593 index = 1;
41594 break;
41595 default:
41596 return 100;
41598 if (in)
41599 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41600 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41602 switch (GET_MODE_SIZE (mode))
41604 case 1:
41605 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41607 if (!in)
41608 return ix86_cost->int_store[0];
41609 if (TARGET_PARTIAL_REG_DEPENDENCY
41610 && optimize_function_for_speed_p (cfun))
41611 cost = ix86_cost->movzbl_load;
41612 else
41613 cost = ix86_cost->int_load[0];
41614 if (in == 2)
41615 return MAX (cost, ix86_cost->int_store[0]);
41616 return cost;
41618 else
41620 if (in == 2)
41621 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41622 if (in)
41623 return ix86_cost->movzbl_load;
41624 else
41625 return ix86_cost->int_store[0] + 4;
41627 break;
41628 case 2:
41629 if (in == 2)
41630 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41631 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41632 default:
41633 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41634 if (mode == TFmode)
41635 mode = XFmode;
41636 if (in == 2)
41637 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41638 else if (in)
41639 cost = ix86_cost->int_load[2];
41640 else
41641 cost = ix86_cost->int_store[2];
41642 return (cost * (((int) GET_MODE_SIZE (mode)
41643 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41647 static int
41648 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41649 bool in)
41651 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41655 /* Return the cost of moving data from a register in class CLASS1 to
41656 one in class CLASS2.
41658 It is not required that the cost always equal 2 when FROM is the same as TO;
41659 on some machines it is expensive to move between registers if they are not
41660 general registers. */
41662 static int
41663 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41664 reg_class_t class2_i)
41666 enum reg_class class1 = (enum reg_class) class1_i;
41667 enum reg_class class2 = (enum reg_class) class2_i;
41669 /* In case we require secondary memory, compute cost of the store followed
41670 by load. In order to avoid bad register allocation choices, we need
41671 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41673 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41675 int cost = 1;
41677 cost += inline_memory_move_cost (mode, class1, 2);
41678 cost += inline_memory_move_cost (mode, class2, 2);
41680 /* In case of copying from general_purpose_register we may emit multiple
41681 stores followed by single load causing memory size mismatch stall.
41682 Count this as arbitrarily high cost of 20. */
41683 if (targetm.class_max_nregs (class1, mode)
41684 > targetm.class_max_nregs (class2, mode))
41685 cost += 20;
41687 /* In the case of FP/MMX moves, the registers actually overlap, and we
41688 have to switch modes in order to treat them differently. */
41689 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41690 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41691 cost += 20;
41693 return cost;
41696 /* Moves between SSE/MMX and integer unit are expensive. */
41697 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41698 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41700 /* ??? By keeping returned value relatively high, we limit the number
41701 of moves between integer and MMX/SSE registers for all targets.
41702 Additionally, high value prevents problem with x86_modes_tieable_p(),
41703 where integer modes in MMX/SSE registers are not tieable
41704 because of missing QImode and HImode moves to, from or between
41705 MMX/SSE registers. */
41706 return MAX (8, ix86_cost->mmxsse_to_integer);
41708 if (MAYBE_FLOAT_CLASS_P (class1))
41709 return ix86_cost->fp_move;
41710 if (MAYBE_SSE_CLASS_P (class1))
41711 return ix86_cost->sse_move;
41712 if (MAYBE_MMX_CLASS_P (class1))
41713 return ix86_cost->mmx_move;
41714 return 2;
41717 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41718 MODE. */
41720 bool
41721 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41723 /* Flags and only flags can only hold CCmode values. */
41724 if (CC_REGNO_P (regno))
41725 return GET_MODE_CLASS (mode) == MODE_CC;
41726 if (GET_MODE_CLASS (mode) == MODE_CC
41727 || GET_MODE_CLASS (mode) == MODE_RANDOM
41728 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41729 return false;
41730 if (STACK_REGNO_P (regno))
41731 return VALID_FP_MODE_P (mode);
41732 if (MASK_REGNO_P (regno))
41733 return (VALID_MASK_REG_MODE (mode)
41734 || (TARGET_AVX512BW
41735 && VALID_MASK_AVX512BW_MODE (mode)));
41736 if (BND_REGNO_P (regno))
41737 return VALID_BND_REG_MODE (mode);
41738 if (SSE_REGNO_P (regno))
41740 /* We implement the move patterns for all vector modes into and
41741 out of SSE registers, even when no operation instructions
41742 are available. */
41744 /* For AVX-512 we allow, regardless of regno:
41745 - XI mode
41746 - any of 512-bit wide vector mode
41747 - any scalar mode. */
41748 if (TARGET_AVX512F
41749 && (mode == XImode
41750 || VALID_AVX512F_REG_MODE (mode)
41751 || VALID_AVX512F_SCALAR_MODE (mode)))
41752 return true;
41754 /* TODO check for QI/HI scalars. */
41755 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41756 if (TARGET_AVX512VL
41757 && (mode == OImode
41758 || mode == TImode
41759 || VALID_AVX256_REG_MODE (mode)
41760 || VALID_AVX512VL_128_REG_MODE (mode)))
41761 return true;
41763 /* xmm16-xmm31 are only available for AVX-512. */
41764 if (EXT_REX_SSE_REGNO_P (regno))
41765 return false;
41767 /* OImode and AVX modes are available only when AVX is enabled. */
41768 return ((TARGET_AVX
41769 && VALID_AVX256_REG_OR_OI_MODE (mode))
41770 || VALID_SSE_REG_MODE (mode)
41771 || VALID_SSE2_REG_MODE (mode)
41772 || VALID_MMX_REG_MODE (mode)
41773 || VALID_MMX_REG_MODE_3DNOW (mode));
41775 if (MMX_REGNO_P (regno))
41777 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41778 so if the register is available at all, then we can move data of
41779 the given mode into or out of it. */
41780 return (VALID_MMX_REG_MODE (mode)
41781 || VALID_MMX_REG_MODE_3DNOW (mode));
41784 if (mode == QImode)
41786 /* Take care for QImode values - they can be in non-QI regs,
41787 but then they do cause partial register stalls. */
41788 if (ANY_QI_REGNO_P (regno))
41789 return true;
41790 if (!TARGET_PARTIAL_REG_STALL)
41791 return true;
41792 /* LRA checks if the hard register is OK for the given mode.
41793 QImode values can live in non-QI regs, so we allow all
41794 registers here. */
41795 if (lra_in_progress)
41796 return true;
41797 return !can_create_pseudo_p ();
41799 /* We handle both integer and floats in the general purpose registers. */
41800 else if (VALID_INT_MODE_P (mode))
41801 return true;
41802 else if (VALID_FP_MODE_P (mode))
41803 return true;
41804 else if (VALID_DFP_MODE_P (mode))
41805 return true;
41806 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41807 on to use that value in smaller contexts, this can easily force a
41808 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41809 supporting DImode, allow it. */
41810 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41811 return true;
41813 return false;
41816 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41817 tieable integer mode. */
41819 static bool
41820 ix86_tieable_integer_mode_p (machine_mode mode)
41822 switch (mode)
41824 case HImode:
41825 case SImode:
41826 return true;
41828 case QImode:
41829 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41831 case DImode:
41832 return TARGET_64BIT;
41834 default:
41835 return false;
41839 /* Return true if MODE1 is accessible in a register that can hold MODE2
41840 without copying. That is, all register classes that can hold MODE2
41841 can also hold MODE1. */
41843 bool
41844 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41846 if (mode1 == mode2)
41847 return true;
41849 if (ix86_tieable_integer_mode_p (mode1)
41850 && ix86_tieable_integer_mode_p (mode2))
41851 return true;
41853 /* MODE2 being XFmode implies fp stack or general regs, which means we
41854 can tie any smaller floating point modes to it. Note that we do not
41855 tie this with TFmode. */
41856 if (mode2 == XFmode)
41857 return mode1 == SFmode || mode1 == DFmode;
41859 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41860 that we can tie it with SFmode. */
41861 if (mode2 == DFmode)
41862 return mode1 == SFmode;
41864 /* If MODE2 is only appropriate for an SSE register, then tie with
41865 any other mode acceptable to SSE registers. */
41866 if (GET_MODE_SIZE (mode2) == 32
41867 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41868 return (GET_MODE_SIZE (mode1) == 32
41869 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41870 if (GET_MODE_SIZE (mode2) == 16
41871 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41872 return (GET_MODE_SIZE (mode1) == 16
41873 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41875 /* If MODE2 is appropriate for an MMX register, then tie
41876 with any other mode acceptable to MMX registers. */
41877 if (GET_MODE_SIZE (mode2) == 8
41878 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41879 return (GET_MODE_SIZE (mode1) == 8
41880 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41882 return false;
41885 /* Return the cost of moving between two registers of mode MODE. */
41887 static int
41888 ix86_set_reg_reg_cost (machine_mode mode)
41890 unsigned int units = UNITS_PER_WORD;
41892 switch (GET_MODE_CLASS (mode))
41894 default:
41895 break;
41897 case MODE_CC:
41898 units = GET_MODE_SIZE (CCmode);
41899 break;
41901 case MODE_FLOAT:
41902 if ((TARGET_SSE && mode == TFmode)
41903 || (TARGET_80387 && mode == XFmode)
41904 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41905 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41906 units = GET_MODE_SIZE (mode);
41907 break;
41909 case MODE_COMPLEX_FLOAT:
41910 if ((TARGET_SSE && mode == TCmode)
41911 || (TARGET_80387 && mode == XCmode)
41912 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41913 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41914 units = GET_MODE_SIZE (mode);
41915 break;
41917 case MODE_VECTOR_INT:
41918 case MODE_VECTOR_FLOAT:
41919 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41920 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41921 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41922 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41923 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41924 units = GET_MODE_SIZE (mode);
41927 /* Return the cost of moving between two registers of mode MODE,
41928 assuming that the move will be in pieces of at most UNITS bytes. */
41929 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41932 /* Compute a (partial) cost for rtx X. Return true if the complete
41933 cost has been computed, and false if subexpressions should be
41934 scanned. In either case, *TOTAL contains the cost result. */
41936 static bool
41937 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
41938 bool speed)
41940 rtx mask;
41941 enum rtx_code code = (enum rtx_code) code_i;
41942 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
41943 machine_mode mode = GET_MODE (x);
41944 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
41946 switch (code)
41948 case SET:
41949 if (register_operand (SET_DEST (x), VOIDmode)
41950 && reg_or_0_operand (SET_SRC (x), VOIDmode))
41952 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
41953 return true;
41955 return false;
41957 case CONST_INT:
41958 case CONST:
41959 case LABEL_REF:
41960 case SYMBOL_REF:
41961 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
41962 *total = 3;
41963 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
41964 *total = 2;
41965 else if (flag_pic && SYMBOLIC_CONST (x)
41966 && !(TARGET_64BIT
41967 && (GET_CODE (x) == LABEL_REF
41968 || (GET_CODE (x) == SYMBOL_REF
41969 && SYMBOL_REF_LOCAL_P (x)))))
41970 *total = 1;
41971 else
41972 *total = 0;
41973 return true;
41975 case CONST_WIDE_INT:
41976 *total = 0;
41977 return true;
41979 case CONST_DOUBLE:
41980 switch (standard_80387_constant_p (x))
41982 case 1: /* 0.0 */
41983 *total = 1;
41984 return true;
41985 default: /* Other constants */
41986 *total = 2;
41987 return true;
41988 case 0:
41989 case -1:
41990 break;
41992 if (SSE_FLOAT_MODE_P (mode))
41994 case CONST_VECTOR:
41995 switch (standard_sse_constant_p (x))
41997 case 0:
41998 break;
41999 case 1: /* 0: xor eliminates false dependency */
42000 *total = 0;
42001 return true;
42002 default: /* -1: cmp contains false dependency */
42003 *total = 1;
42004 return true;
42007 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42008 it'll probably end up. Add a penalty for size. */
42009 *total = (COSTS_N_INSNS (1)
42010 + (flag_pic != 0 && !TARGET_64BIT)
42011 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42012 return true;
42014 case ZERO_EXTEND:
42015 /* The zero extensions is often completely free on x86_64, so make
42016 it as cheap as possible. */
42017 if (TARGET_64BIT && mode == DImode
42018 && GET_MODE (XEXP (x, 0)) == SImode)
42019 *total = 1;
42020 else if (TARGET_ZERO_EXTEND_WITH_AND)
42021 *total = cost->add;
42022 else
42023 *total = cost->movzx;
42024 return false;
42026 case SIGN_EXTEND:
42027 *total = cost->movsx;
42028 return false;
42030 case ASHIFT:
42031 if (SCALAR_INT_MODE_P (mode)
42032 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42033 && CONST_INT_P (XEXP (x, 1)))
42035 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42036 if (value == 1)
42038 *total = cost->add;
42039 return false;
42041 if ((value == 2 || value == 3)
42042 && cost->lea <= cost->shift_const)
42044 *total = cost->lea;
42045 return false;
42048 /* FALLTHRU */
42050 case ROTATE:
42051 case ASHIFTRT:
42052 case LSHIFTRT:
42053 case ROTATERT:
42054 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42056 /* ??? Should be SSE vector operation cost. */
42057 /* At least for published AMD latencies, this really is the same
42058 as the latency for a simple fpu operation like fabs. */
42059 /* V*QImode is emulated with 1-11 insns. */
42060 if (mode == V16QImode || mode == V32QImode)
42062 int count = 11;
42063 if (TARGET_XOP && mode == V16QImode)
42065 /* For XOP we use vpshab, which requires a broadcast of the
42066 value to the variable shift insn. For constants this
42067 means a V16Q const in mem; even when we can perform the
42068 shift with one insn set the cost to prefer paddb. */
42069 if (CONSTANT_P (XEXP (x, 1)))
42071 *total = (cost->fabs
42072 + rtx_cost (XEXP (x, 0), code, 0, speed)
42073 + (speed ? 2 : COSTS_N_BYTES (16)));
42074 return true;
42076 count = 3;
42078 else if (TARGET_SSSE3)
42079 count = 7;
42080 *total = cost->fabs * count;
42082 else
42083 *total = cost->fabs;
42085 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42087 if (CONST_INT_P (XEXP (x, 1)))
42089 if (INTVAL (XEXP (x, 1)) > 32)
42090 *total = cost->shift_const + COSTS_N_INSNS (2);
42091 else
42092 *total = cost->shift_const * 2;
42094 else
42096 if (GET_CODE (XEXP (x, 1)) == AND)
42097 *total = cost->shift_var * 2;
42098 else
42099 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42102 else
42104 if (CONST_INT_P (XEXP (x, 1)))
42105 *total = cost->shift_const;
42106 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42107 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42109 /* Return the cost after shift-and truncation. */
42110 *total = cost->shift_var;
42111 return true;
42113 else
42114 *total = cost->shift_var;
42116 return false;
42118 case FMA:
42120 rtx sub;
42122 gcc_assert (FLOAT_MODE_P (mode));
42123 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42125 /* ??? SSE scalar/vector cost should be used here. */
42126 /* ??? Bald assumption that fma has the same cost as fmul. */
42127 *total = cost->fmul;
42128 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42130 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42131 sub = XEXP (x, 0);
42132 if (GET_CODE (sub) == NEG)
42133 sub = XEXP (sub, 0);
42134 *total += rtx_cost (sub, FMA, 0, speed);
42136 sub = XEXP (x, 2);
42137 if (GET_CODE (sub) == NEG)
42138 sub = XEXP (sub, 0);
42139 *total += rtx_cost (sub, FMA, 2, speed);
42140 return true;
42143 case MULT:
42144 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42146 /* ??? SSE scalar cost should be used here. */
42147 *total = cost->fmul;
42148 return false;
42150 else if (X87_FLOAT_MODE_P (mode))
42152 *total = cost->fmul;
42153 return false;
42155 else if (FLOAT_MODE_P (mode))
42157 /* ??? SSE vector cost should be used here. */
42158 *total = cost->fmul;
42159 return false;
42161 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42163 /* V*QImode is emulated with 7-13 insns. */
42164 if (mode == V16QImode || mode == V32QImode)
42166 int extra = 11;
42167 if (TARGET_XOP && mode == V16QImode)
42168 extra = 5;
42169 else if (TARGET_SSSE3)
42170 extra = 6;
42171 *total = cost->fmul * 2 + cost->fabs * extra;
42173 /* V*DImode is emulated with 5-8 insns. */
42174 else if (mode == V2DImode || mode == V4DImode)
42176 if (TARGET_XOP && mode == V2DImode)
42177 *total = cost->fmul * 2 + cost->fabs * 3;
42178 else
42179 *total = cost->fmul * 3 + cost->fabs * 5;
42181 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42182 insns, including two PMULUDQ. */
42183 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42184 *total = cost->fmul * 2 + cost->fabs * 5;
42185 else
42186 *total = cost->fmul;
42187 return false;
42189 else
42191 rtx op0 = XEXP (x, 0);
42192 rtx op1 = XEXP (x, 1);
42193 int nbits;
42194 if (CONST_INT_P (XEXP (x, 1)))
42196 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42197 for (nbits = 0; value != 0; value &= value - 1)
42198 nbits++;
42200 else
42201 /* This is arbitrary. */
42202 nbits = 7;
42204 /* Compute costs correctly for widening multiplication. */
42205 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42206 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42207 == GET_MODE_SIZE (mode))
42209 int is_mulwiden = 0;
42210 machine_mode inner_mode = GET_MODE (op0);
42212 if (GET_CODE (op0) == GET_CODE (op1))
42213 is_mulwiden = 1, op1 = XEXP (op1, 0);
42214 else if (CONST_INT_P (op1))
42216 if (GET_CODE (op0) == SIGN_EXTEND)
42217 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42218 == INTVAL (op1);
42219 else
42220 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42223 if (is_mulwiden)
42224 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42227 *total = (cost->mult_init[MODE_INDEX (mode)]
42228 + nbits * cost->mult_bit
42229 + rtx_cost (op0, outer_code, opno, speed)
42230 + rtx_cost (op1, outer_code, opno, speed));
42232 return true;
42235 case DIV:
42236 case UDIV:
42237 case MOD:
42238 case UMOD:
42239 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42240 /* ??? SSE cost should be used here. */
42241 *total = cost->fdiv;
42242 else if (X87_FLOAT_MODE_P (mode))
42243 *total = cost->fdiv;
42244 else if (FLOAT_MODE_P (mode))
42245 /* ??? SSE vector cost should be used here. */
42246 *total = cost->fdiv;
42247 else
42248 *total = cost->divide[MODE_INDEX (mode)];
42249 return false;
42251 case PLUS:
42252 if (GET_MODE_CLASS (mode) == MODE_INT
42253 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42255 if (GET_CODE (XEXP (x, 0)) == PLUS
42256 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42257 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42258 && CONSTANT_P (XEXP (x, 1)))
42260 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42261 if (val == 2 || val == 4 || val == 8)
42263 *total = cost->lea;
42264 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42265 outer_code, opno, speed);
42266 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42267 outer_code, opno, speed);
42268 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42269 return true;
42272 else if (GET_CODE (XEXP (x, 0)) == MULT
42273 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42275 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42276 if (val == 2 || val == 4 || val == 8)
42278 *total = cost->lea;
42279 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42280 outer_code, opno, speed);
42281 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42282 return true;
42285 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42287 *total = cost->lea;
42288 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42289 outer_code, opno, speed);
42290 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42291 outer_code, opno, speed);
42292 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42293 return true;
42296 /* FALLTHRU */
42298 case MINUS:
42299 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42301 /* ??? SSE cost should be used here. */
42302 *total = cost->fadd;
42303 return false;
42305 else if (X87_FLOAT_MODE_P (mode))
42307 *total = cost->fadd;
42308 return false;
42310 else if (FLOAT_MODE_P (mode))
42312 /* ??? SSE vector cost should be used here. */
42313 *total = cost->fadd;
42314 return false;
42316 /* FALLTHRU */
42318 case AND:
42319 case IOR:
42320 case XOR:
42321 if (GET_MODE_CLASS (mode) == MODE_INT
42322 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42324 *total = (cost->add * 2
42325 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42326 << (GET_MODE (XEXP (x, 0)) != DImode))
42327 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42328 << (GET_MODE (XEXP (x, 1)) != DImode)));
42329 return true;
42331 /* FALLTHRU */
42333 case NEG:
42334 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42336 /* ??? SSE cost should be used here. */
42337 *total = cost->fchs;
42338 return false;
42340 else if (X87_FLOAT_MODE_P (mode))
42342 *total = cost->fchs;
42343 return false;
42345 else if (FLOAT_MODE_P (mode))
42347 /* ??? SSE vector cost should be used here. */
42348 *total = cost->fchs;
42349 return false;
42351 /* FALLTHRU */
42353 case NOT:
42354 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42356 /* ??? Should be SSE vector operation cost. */
42357 /* At least for published AMD latencies, this really is the same
42358 as the latency for a simple fpu operation like fabs. */
42359 *total = cost->fabs;
42361 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42362 *total = cost->add * 2;
42363 else
42364 *total = cost->add;
42365 return false;
42367 case COMPARE:
42368 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42369 && XEXP (XEXP (x, 0), 1) == const1_rtx
42370 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42371 && XEXP (x, 1) == const0_rtx)
42373 /* This kind of construct is implemented using test[bwl].
42374 Treat it as if we had an AND. */
42375 *total = (cost->add
42376 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42377 + rtx_cost (const1_rtx, outer_code, opno, speed));
42378 return true;
42380 return false;
42382 case FLOAT_EXTEND:
42383 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42384 *total = 0;
42385 return false;
42387 case ABS:
42388 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42389 /* ??? SSE cost should be used here. */
42390 *total = cost->fabs;
42391 else if (X87_FLOAT_MODE_P (mode))
42392 *total = cost->fabs;
42393 else if (FLOAT_MODE_P (mode))
42394 /* ??? SSE vector cost should be used here. */
42395 *total = cost->fabs;
42396 return false;
42398 case SQRT:
42399 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42400 /* ??? SSE cost should be used here. */
42401 *total = cost->fsqrt;
42402 else if (X87_FLOAT_MODE_P (mode))
42403 *total = cost->fsqrt;
42404 else if (FLOAT_MODE_P (mode))
42405 /* ??? SSE vector cost should be used here. */
42406 *total = cost->fsqrt;
42407 return false;
42409 case UNSPEC:
42410 if (XINT (x, 1) == UNSPEC_TP)
42411 *total = 0;
42412 return false;
42414 case VEC_SELECT:
42415 case VEC_CONCAT:
42416 case VEC_DUPLICATE:
42417 /* ??? Assume all of these vector manipulation patterns are
42418 recognizable. In which case they all pretty much have the
42419 same cost. */
42420 *total = cost->fabs;
42421 return true;
42422 case VEC_MERGE:
42423 mask = XEXP (x, 2);
42424 /* This is masked instruction, assume the same cost,
42425 as nonmasked variant. */
42426 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42427 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42428 else
42429 *total = cost->fabs;
42430 return true;
42432 default:
42433 return false;
42437 #if TARGET_MACHO
42439 static int current_machopic_label_num;
42441 /* Given a symbol name and its associated stub, write out the
42442 definition of the stub. */
42444 void
42445 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42447 unsigned int length;
42448 char *binder_name, *symbol_name, lazy_ptr_name[32];
42449 int label = ++current_machopic_label_num;
42451 /* For 64-bit we shouldn't get here. */
42452 gcc_assert (!TARGET_64BIT);
42454 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42455 symb = targetm.strip_name_encoding (symb);
42457 length = strlen (stub);
42458 binder_name = XALLOCAVEC (char, length + 32);
42459 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42461 length = strlen (symb);
42462 symbol_name = XALLOCAVEC (char, length + 32);
42463 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42465 sprintf (lazy_ptr_name, "L%d$lz", label);
42467 if (MACHOPIC_ATT_STUB)
42468 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42469 else if (MACHOPIC_PURE)
42470 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42471 else
42472 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42474 fprintf (file, "%s:\n", stub);
42475 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42477 if (MACHOPIC_ATT_STUB)
42479 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42481 else if (MACHOPIC_PURE)
42483 /* PIC stub. */
42484 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42485 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42486 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42487 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42488 label, lazy_ptr_name, label);
42489 fprintf (file, "\tjmp\t*%%ecx\n");
42491 else
42492 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42494 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42495 it needs no stub-binding-helper. */
42496 if (MACHOPIC_ATT_STUB)
42497 return;
42499 fprintf (file, "%s:\n", binder_name);
42501 if (MACHOPIC_PURE)
42503 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42504 fprintf (file, "\tpushl\t%%ecx\n");
42506 else
42507 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42509 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42511 /* N.B. Keep the correspondence of these
42512 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42513 old-pic/new-pic/non-pic stubs; altering this will break
42514 compatibility with existing dylibs. */
42515 if (MACHOPIC_PURE)
42517 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42518 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42520 else
42521 /* 16-byte -mdynamic-no-pic stub. */
42522 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42524 fprintf (file, "%s:\n", lazy_ptr_name);
42525 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42526 fprintf (file, ASM_LONG "%s\n", binder_name);
42528 #endif /* TARGET_MACHO */
42530 /* Order the registers for register allocator. */
42532 void
42533 x86_order_regs_for_local_alloc (void)
42535 int pos = 0;
42536 int i;
42538 /* First allocate the local general purpose registers. */
42539 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42540 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42541 reg_alloc_order [pos++] = i;
42543 /* Global general purpose registers. */
42544 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42545 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42546 reg_alloc_order [pos++] = i;
42548 /* x87 registers come first in case we are doing FP math
42549 using them. */
42550 if (!TARGET_SSE_MATH)
42551 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42552 reg_alloc_order [pos++] = i;
42554 /* SSE registers. */
42555 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42556 reg_alloc_order [pos++] = i;
42557 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42558 reg_alloc_order [pos++] = i;
42560 /* Extended REX SSE registers. */
42561 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42562 reg_alloc_order [pos++] = i;
42564 /* Mask register. */
42565 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42566 reg_alloc_order [pos++] = i;
42568 /* MPX bound registers. */
42569 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42570 reg_alloc_order [pos++] = i;
42572 /* x87 registers. */
42573 if (TARGET_SSE_MATH)
42574 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42575 reg_alloc_order [pos++] = i;
42577 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42578 reg_alloc_order [pos++] = i;
42580 /* Initialize the rest of array as we do not allocate some registers
42581 at all. */
42582 while (pos < FIRST_PSEUDO_REGISTER)
42583 reg_alloc_order [pos++] = 0;
42586 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42587 in struct attribute_spec handler. */
42588 static tree
42589 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42590 tree args,
42591 int,
42592 bool *no_add_attrs)
42594 if (TREE_CODE (*node) != FUNCTION_TYPE
42595 && TREE_CODE (*node) != METHOD_TYPE
42596 && TREE_CODE (*node) != FIELD_DECL
42597 && TREE_CODE (*node) != TYPE_DECL)
42599 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42600 name);
42601 *no_add_attrs = true;
42602 return NULL_TREE;
42604 if (TARGET_64BIT)
42606 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42607 name);
42608 *no_add_attrs = true;
42609 return NULL_TREE;
42611 if (is_attribute_p ("callee_pop_aggregate_return", name))
42613 tree cst;
42615 cst = TREE_VALUE (args);
42616 if (TREE_CODE (cst) != INTEGER_CST)
42618 warning (OPT_Wattributes,
42619 "%qE attribute requires an integer constant argument",
42620 name);
42621 *no_add_attrs = true;
42623 else if (compare_tree_int (cst, 0) != 0
42624 && compare_tree_int (cst, 1) != 0)
42626 warning (OPT_Wattributes,
42627 "argument to %qE attribute is neither zero, nor one",
42628 name);
42629 *no_add_attrs = true;
42632 return NULL_TREE;
42635 return NULL_TREE;
42638 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42639 struct attribute_spec.handler. */
42640 static tree
42641 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42642 bool *no_add_attrs)
42644 if (TREE_CODE (*node) != FUNCTION_TYPE
42645 && TREE_CODE (*node) != METHOD_TYPE
42646 && TREE_CODE (*node) != FIELD_DECL
42647 && TREE_CODE (*node) != TYPE_DECL)
42649 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42650 name);
42651 *no_add_attrs = true;
42652 return NULL_TREE;
42655 /* Can combine regparm with all attributes but fastcall. */
42656 if (is_attribute_p ("ms_abi", name))
42658 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42660 error ("ms_abi and sysv_abi attributes are not compatible");
42663 return NULL_TREE;
42665 else if (is_attribute_p ("sysv_abi", name))
42667 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42669 error ("ms_abi and sysv_abi attributes are not compatible");
42672 return NULL_TREE;
42675 return NULL_TREE;
42678 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42679 struct attribute_spec.handler. */
42680 static tree
42681 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42682 bool *no_add_attrs)
42684 tree *type = NULL;
42685 if (DECL_P (*node))
42687 if (TREE_CODE (*node) == TYPE_DECL)
42688 type = &TREE_TYPE (*node);
42690 else
42691 type = node;
42693 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42695 warning (OPT_Wattributes, "%qE attribute ignored",
42696 name);
42697 *no_add_attrs = true;
42700 else if ((is_attribute_p ("ms_struct", name)
42701 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42702 || ((is_attribute_p ("gcc_struct", name)
42703 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42705 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42706 name);
42707 *no_add_attrs = true;
42710 return NULL_TREE;
42713 static tree
42714 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42715 bool *no_add_attrs)
42717 if (TREE_CODE (*node) != FUNCTION_DECL)
42719 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42720 name);
42721 *no_add_attrs = true;
42723 return NULL_TREE;
42726 static bool
42727 ix86_ms_bitfield_layout_p (const_tree record_type)
42729 return ((TARGET_MS_BITFIELD_LAYOUT
42730 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42731 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42734 /* Returns an expression indicating where the this parameter is
42735 located on entry to the FUNCTION. */
42737 static rtx
42738 x86_this_parameter (tree function)
42740 tree type = TREE_TYPE (function);
42741 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42742 int nregs;
42744 if (TARGET_64BIT)
42746 const int *parm_regs;
42748 if (ix86_function_type_abi (type) == MS_ABI)
42749 parm_regs = x86_64_ms_abi_int_parameter_registers;
42750 else
42751 parm_regs = x86_64_int_parameter_registers;
42752 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42755 nregs = ix86_function_regparm (type, function);
42757 if (nregs > 0 && !stdarg_p (type))
42759 int regno;
42760 unsigned int ccvt = ix86_get_callcvt (type);
42762 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42763 regno = aggr ? DX_REG : CX_REG;
42764 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42766 regno = CX_REG;
42767 if (aggr)
42768 return gen_rtx_MEM (SImode,
42769 plus_constant (Pmode, stack_pointer_rtx, 4));
42771 else
42773 regno = AX_REG;
42774 if (aggr)
42776 regno = DX_REG;
42777 if (nregs == 1)
42778 return gen_rtx_MEM (SImode,
42779 plus_constant (Pmode,
42780 stack_pointer_rtx, 4));
42783 return gen_rtx_REG (SImode, regno);
42786 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42787 aggr ? 8 : 4));
42790 /* Determine whether x86_output_mi_thunk can succeed. */
42792 static bool
42793 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42794 const_tree function)
42796 /* 64-bit can handle anything. */
42797 if (TARGET_64BIT)
42798 return true;
42800 /* For 32-bit, everything's fine if we have one free register. */
42801 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42802 return true;
42804 /* Need a free register for vcall_offset. */
42805 if (vcall_offset)
42806 return false;
42808 /* Need a free register for GOT references. */
42809 if (flag_pic && !targetm.binds_local_p (function))
42810 return false;
42812 /* Otherwise ok. */
42813 return true;
42816 /* Output the assembler code for a thunk function. THUNK_DECL is the
42817 declaration for the thunk function itself, FUNCTION is the decl for
42818 the target function. DELTA is an immediate constant offset to be
42819 added to THIS. If VCALL_OFFSET is nonzero, the word at
42820 *(*this + vcall_offset) should be added to THIS. */
42822 static void
42823 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42824 HOST_WIDE_INT vcall_offset, tree function)
42826 rtx this_param = x86_this_parameter (function);
42827 rtx this_reg, tmp, fnaddr;
42828 unsigned int tmp_regno;
42829 rtx_insn *insn;
42831 if (TARGET_64BIT)
42832 tmp_regno = R10_REG;
42833 else
42835 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42836 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42837 tmp_regno = AX_REG;
42838 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42839 tmp_regno = DX_REG;
42840 else
42841 tmp_regno = CX_REG;
42844 emit_note (NOTE_INSN_PROLOGUE_END);
42846 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42847 pull it in now and let DELTA benefit. */
42848 if (REG_P (this_param))
42849 this_reg = this_param;
42850 else if (vcall_offset)
42852 /* Put the this parameter into %eax. */
42853 this_reg = gen_rtx_REG (Pmode, AX_REG);
42854 emit_move_insn (this_reg, this_param);
42856 else
42857 this_reg = NULL_RTX;
42859 /* Adjust the this parameter by a fixed constant. */
42860 if (delta)
42862 rtx delta_rtx = GEN_INT (delta);
42863 rtx delta_dst = this_reg ? this_reg : this_param;
42865 if (TARGET_64BIT)
42867 if (!x86_64_general_operand (delta_rtx, Pmode))
42869 tmp = gen_rtx_REG (Pmode, tmp_regno);
42870 emit_move_insn (tmp, delta_rtx);
42871 delta_rtx = tmp;
42875 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42878 /* Adjust the this parameter by a value stored in the vtable. */
42879 if (vcall_offset)
42881 rtx vcall_addr, vcall_mem, this_mem;
42883 tmp = gen_rtx_REG (Pmode, tmp_regno);
42885 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42886 if (Pmode != ptr_mode)
42887 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42888 emit_move_insn (tmp, this_mem);
42890 /* Adjust the this parameter. */
42891 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42892 if (TARGET_64BIT
42893 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42895 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42896 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42897 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42900 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42901 if (Pmode != ptr_mode)
42902 emit_insn (gen_addsi_1_zext (this_reg,
42903 gen_rtx_REG (ptr_mode,
42904 REGNO (this_reg)),
42905 vcall_mem));
42906 else
42907 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42910 /* If necessary, drop THIS back to its stack slot. */
42911 if (this_reg && this_reg != this_param)
42912 emit_move_insn (this_param, this_reg);
42914 fnaddr = XEXP (DECL_RTL (function), 0);
42915 if (TARGET_64BIT)
42917 if (!flag_pic || targetm.binds_local_p (function)
42918 || TARGET_PECOFF)
42920 else
42922 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42923 tmp = gen_rtx_CONST (Pmode, tmp);
42924 fnaddr = gen_const_mem (Pmode, tmp);
42927 else
42929 if (!flag_pic || targetm.binds_local_p (function))
42931 #if TARGET_MACHO
42932 else if (TARGET_MACHO)
42934 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
42935 fnaddr = XEXP (fnaddr, 0);
42937 #endif /* TARGET_MACHO */
42938 else
42940 tmp = gen_rtx_REG (Pmode, CX_REG);
42941 output_set_got (tmp, NULL_RTX);
42943 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
42944 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
42945 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
42946 fnaddr = gen_const_mem (Pmode, fnaddr);
42950 /* Our sibling call patterns do not allow memories, because we have no
42951 predicate that can distinguish between frame and non-frame memory.
42952 For our purposes here, we can get away with (ab)using a jump pattern,
42953 because we're going to do no optimization. */
42954 if (MEM_P (fnaddr))
42956 if (sibcall_insn_operand (fnaddr, word_mode))
42958 fnaddr = XEXP (DECL_RTL (function), 0);
42959 tmp = gen_rtx_MEM (QImode, fnaddr);
42960 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42961 tmp = emit_call_insn (tmp);
42962 SIBLING_CALL_P (tmp) = 1;
42964 else
42965 emit_jump_insn (gen_indirect_jump (fnaddr));
42967 else
42969 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
42971 // CM_LARGE_PIC always uses pseudo PIC register which is
42972 // uninitialized. Since FUNCTION is local and calling it
42973 // doesn't go through PLT, we use scratch register %r11 as
42974 // PIC register and initialize it here.
42975 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
42976 ix86_init_large_pic_reg (tmp_regno);
42977 fnaddr = legitimize_pic_address (fnaddr,
42978 gen_rtx_REG (Pmode, tmp_regno));
42981 if (!sibcall_insn_operand (fnaddr, word_mode))
42983 tmp = gen_rtx_REG (word_mode, tmp_regno);
42984 if (GET_MODE (fnaddr) != word_mode)
42985 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
42986 emit_move_insn (tmp, fnaddr);
42987 fnaddr = tmp;
42990 tmp = gen_rtx_MEM (QImode, fnaddr);
42991 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
42992 tmp = emit_call_insn (tmp);
42993 SIBLING_CALL_P (tmp) = 1;
42995 emit_barrier ();
42997 /* Emit just enough of rest_of_compilation to get the insns emitted.
42998 Note that use_thunk calls assemble_start_function et al. */
42999 insn = get_insns ();
43000 shorten_branches (insn);
43001 final_start_function (insn, file, 1);
43002 final (insn, file, 1);
43003 final_end_function ();
43006 static void
43007 x86_file_start (void)
43009 default_file_start ();
43010 if (TARGET_16BIT)
43011 fputs ("\t.code16gcc\n", asm_out_file);
43012 #if TARGET_MACHO
43013 darwin_file_start ();
43014 #endif
43015 if (X86_FILE_START_VERSION_DIRECTIVE)
43016 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43017 if (X86_FILE_START_FLTUSED)
43018 fputs ("\t.global\t__fltused\n", asm_out_file);
43019 if (ix86_asm_dialect == ASM_INTEL)
43020 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43024 x86_field_alignment (tree field, int computed)
43026 machine_mode mode;
43027 tree type = TREE_TYPE (field);
43029 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43030 return computed;
43031 mode = TYPE_MODE (strip_array_types (type));
43032 if (mode == DFmode || mode == DCmode
43033 || GET_MODE_CLASS (mode) == MODE_INT
43034 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43035 return MIN (32, computed);
43036 return computed;
43039 /* Print call to TARGET to FILE. */
43041 static void
43042 x86_print_call_or_nop (FILE *file, const char *target)
43044 if (flag_nop_mcount)
43045 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43046 else
43047 fprintf (file, "1:\tcall\t%s\n", target);
43050 /* Output assembler code to FILE to increment profiler label # LABELNO
43051 for profiling a function entry. */
43052 void
43053 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43055 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43056 : MCOUNT_NAME);
43057 if (TARGET_64BIT)
43059 #ifndef NO_PROFILE_COUNTERS
43060 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43061 #endif
43063 if (!TARGET_PECOFF && flag_pic)
43064 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43065 else
43066 x86_print_call_or_nop (file, mcount_name);
43068 else if (flag_pic)
43070 #ifndef NO_PROFILE_COUNTERS
43071 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43072 LPREFIX, labelno);
43073 #endif
43074 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43076 else
43078 #ifndef NO_PROFILE_COUNTERS
43079 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43080 LPREFIX, labelno);
43081 #endif
43082 x86_print_call_or_nop (file, mcount_name);
43085 if (flag_record_mcount)
43087 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43088 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43089 fprintf (file, "\t.previous\n");
43093 /* We don't have exact information about the insn sizes, but we may assume
43094 quite safely that we are informed about all 1 byte insns and memory
43095 address sizes. This is enough to eliminate unnecessary padding in
43096 99% of cases. */
43098 static int
43099 min_insn_size (rtx_insn *insn)
43101 int l = 0, len;
43103 if (!INSN_P (insn) || !active_insn_p (insn))
43104 return 0;
43106 /* Discard alignments we've emit and jump instructions. */
43107 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43108 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43109 return 0;
43111 /* Important case - calls are always 5 bytes.
43112 It is common to have many calls in the row. */
43113 if (CALL_P (insn)
43114 && symbolic_reference_mentioned_p (PATTERN (insn))
43115 && !SIBLING_CALL_P (insn))
43116 return 5;
43117 len = get_attr_length (insn);
43118 if (len <= 1)
43119 return 1;
43121 /* For normal instructions we rely on get_attr_length being exact,
43122 with a few exceptions. */
43123 if (!JUMP_P (insn))
43125 enum attr_type type = get_attr_type (insn);
43127 switch (type)
43129 case TYPE_MULTI:
43130 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43131 || asm_noperands (PATTERN (insn)) >= 0)
43132 return 0;
43133 break;
43134 case TYPE_OTHER:
43135 case TYPE_FCMP:
43136 break;
43137 default:
43138 /* Otherwise trust get_attr_length. */
43139 return len;
43142 l = get_attr_length_address (insn);
43143 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43144 l = 4;
43146 if (l)
43147 return 1+l;
43148 else
43149 return 2;
43152 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43154 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43155 window. */
43157 static void
43158 ix86_avoid_jump_mispredicts (void)
43160 rtx_insn *insn, *start = get_insns ();
43161 int nbytes = 0, njumps = 0;
43162 bool isjump = false;
43164 /* Look for all minimal intervals of instructions containing 4 jumps.
43165 The intervals are bounded by START and INSN. NBYTES is the total
43166 size of instructions in the interval including INSN and not including
43167 START. When the NBYTES is smaller than 16 bytes, it is possible
43168 that the end of START and INSN ends up in the same 16byte page.
43170 The smallest offset in the page INSN can start is the case where START
43171 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43172 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43174 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43175 have to, control transfer to label(s) can be performed through other
43176 means, and also we estimate minimum length of all asm stmts as 0. */
43177 for (insn = start; insn; insn = NEXT_INSN (insn))
43179 int min_size;
43181 if (LABEL_P (insn))
43183 int align = label_to_alignment (insn);
43184 int max_skip = label_to_max_skip (insn);
43186 if (max_skip > 15)
43187 max_skip = 15;
43188 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43189 already in the current 16 byte page, because otherwise
43190 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43191 bytes to reach 16 byte boundary. */
43192 if (align <= 0
43193 || (align <= 3 && max_skip != (1 << align) - 1))
43194 max_skip = 0;
43195 if (dump_file)
43196 fprintf (dump_file, "Label %i with max_skip %i\n",
43197 INSN_UID (insn), max_skip);
43198 if (max_skip)
43200 while (nbytes + max_skip >= 16)
43202 start = NEXT_INSN (start);
43203 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43204 || CALL_P (start))
43205 njumps--, isjump = true;
43206 else
43207 isjump = false;
43208 nbytes -= min_insn_size (start);
43211 continue;
43214 min_size = min_insn_size (insn);
43215 nbytes += min_size;
43216 if (dump_file)
43217 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43218 INSN_UID (insn), min_size);
43219 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43220 || CALL_P (insn))
43221 njumps++;
43222 else
43223 continue;
43225 while (njumps > 3)
43227 start = NEXT_INSN (start);
43228 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43229 || CALL_P (start))
43230 njumps--, isjump = true;
43231 else
43232 isjump = false;
43233 nbytes -= min_insn_size (start);
43235 gcc_assert (njumps >= 0);
43236 if (dump_file)
43237 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43238 INSN_UID (start), INSN_UID (insn), nbytes);
43240 if (njumps == 3 && isjump && nbytes < 16)
43242 int padsize = 15 - nbytes + min_insn_size (insn);
43244 if (dump_file)
43245 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43246 INSN_UID (insn), padsize);
43247 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43251 #endif
43253 /* AMD Athlon works faster
43254 when RET is not destination of conditional jump or directly preceded
43255 by other jump instruction. We avoid the penalty by inserting NOP just
43256 before the RET instructions in such cases. */
43257 static void
43258 ix86_pad_returns (void)
43260 edge e;
43261 edge_iterator ei;
43263 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43265 basic_block bb = e->src;
43266 rtx_insn *ret = BB_END (bb);
43267 rtx_insn *prev;
43268 bool replace = false;
43270 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43271 || optimize_bb_for_size_p (bb))
43272 continue;
43273 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43274 if (active_insn_p (prev) || LABEL_P (prev))
43275 break;
43276 if (prev && LABEL_P (prev))
43278 edge e;
43279 edge_iterator ei;
43281 FOR_EACH_EDGE (e, ei, bb->preds)
43282 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43283 && !(e->flags & EDGE_FALLTHRU))
43285 replace = true;
43286 break;
43289 if (!replace)
43291 prev = prev_active_insn (ret);
43292 if (prev
43293 && ((JUMP_P (prev) && any_condjump_p (prev))
43294 || CALL_P (prev)))
43295 replace = true;
43296 /* Empty functions get branch mispredict even when
43297 the jump destination is not visible to us. */
43298 if (!prev && !optimize_function_for_size_p (cfun))
43299 replace = true;
43301 if (replace)
43303 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43304 delete_insn (ret);
43309 /* Count the minimum number of instructions in BB. Return 4 if the
43310 number of instructions >= 4. */
43312 static int
43313 ix86_count_insn_bb (basic_block bb)
43315 rtx_insn *insn;
43316 int insn_count = 0;
43318 /* Count number of instructions in this block. Return 4 if the number
43319 of instructions >= 4. */
43320 FOR_BB_INSNS (bb, insn)
43322 /* Only happen in exit blocks. */
43323 if (JUMP_P (insn)
43324 && ANY_RETURN_P (PATTERN (insn)))
43325 break;
43327 if (NONDEBUG_INSN_P (insn)
43328 && GET_CODE (PATTERN (insn)) != USE
43329 && GET_CODE (PATTERN (insn)) != CLOBBER)
43331 insn_count++;
43332 if (insn_count >= 4)
43333 return insn_count;
43337 return insn_count;
43341 /* Count the minimum number of instructions in code path in BB.
43342 Return 4 if the number of instructions >= 4. */
43344 static int
43345 ix86_count_insn (basic_block bb)
43347 edge e;
43348 edge_iterator ei;
43349 int min_prev_count;
43351 /* Only bother counting instructions along paths with no
43352 more than 2 basic blocks between entry and exit. Given
43353 that BB has an edge to exit, determine if a predecessor
43354 of BB has an edge from entry. If so, compute the number
43355 of instructions in the predecessor block. If there
43356 happen to be multiple such blocks, compute the minimum. */
43357 min_prev_count = 4;
43358 FOR_EACH_EDGE (e, ei, bb->preds)
43360 edge prev_e;
43361 edge_iterator prev_ei;
43363 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43365 min_prev_count = 0;
43366 break;
43368 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43370 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43372 int count = ix86_count_insn_bb (e->src);
43373 if (count < min_prev_count)
43374 min_prev_count = count;
43375 break;
43380 if (min_prev_count < 4)
43381 min_prev_count += ix86_count_insn_bb (bb);
43383 return min_prev_count;
43386 /* Pad short function to 4 instructions. */
43388 static void
43389 ix86_pad_short_function (void)
43391 edge e;
43392 edge_iterator ei;
43394 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43396 rtx_insn *ret = BB_END (e->src);
43397 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43399 int insn_count = ix86_count_insn (e->src);
43401 /* Pad short function. */
43402 if (insn_count < 4)
43404 rtx_insn *insn = ret;
43406 /* Find epilogue. */
43407 while (insn
43408 && (!NOTE_P (insn)
43409 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43410 insn = PREV_INSN (insn);
43412 if (!insn)
43413 insn = ret;
43415 /* Two NOPs count as one instruction. */
43416 insn_count = 2 * (4 - insn_count);
43417 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43423 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43424 the epilogue, the Windows system unwinder will apply epilogue logic and
43425 produce incorrect offsets. This can be avoided by adding a nop between
43426 the last insn that can throw and the first insn of the epilogue. */
43428 static void
43429 ix86_seh_fixup_eh_fallthru (void)
43431 edge e;
43432 edge_iterator ei;
43434 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43436 rtx_insn *insn, *next;
43438 /* Find the beginning of the epilogue. */
43439 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43440 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43441 break;
43442 if (insn == NULL)
43443 continue;
43445 /* We only care about preceding insns that can throw. */
43446 insn = prev_active_insn (insn);
43447 if (insn == NULL || !can_throw_internal (insn))
43448 continue;
43450 /* Do not separate calls from their debug information. */
43451 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43452 if (NOTE_P (next)
43453 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43454 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43455 insn = next;
43456 else
43457 break;
43459 emit_insn_after (gen_nops (const1_rtx), insn);
43463 /* Implement machine specific optimizations. We implement padding of returns
43464 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43465 static void
43466 ix86_reorg (void)
43468 /* We are freeing block_for_insn in the toplev to keep compatibility
43469 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43470 compute_bb_for_insn ();
43472 if (TARGET_SEH && current_function_has_exception_handlers ())
43473 ix86_seh_fixup_eh_fallthru ();
43475 if (optimize && optimize_function_for_speed_p (cfun))
43477 if (TARGET_PAD_SHORT_FUNCTION)
43478 ix86_pad_short_function ();
43479 else if (TARGET_PAD_RETURNS)
43480 ix86_pad_returns ();
43481 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43482 if (TARGET_FOUR_JUMP_LIMIT)
43483 ix86_avoid_jump_mispredicts ();
43484 #endif
43488 /* Return nonzero when QImode register that must be represented via REX prefix
43489 is used. */
43490 bool
43491 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43493 int i;
43494 extract_insn_cached (insn);
43495 for (i = 0; i < recog_data.n_operands; i++)
43496 if (GENERAL_REG_P (recog_data.operand[i])
43497 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43498 return true;
43499 return false;
43502 /* Return true when INSN mentions register that must be encoded using REX
43503 prefix. */
43504 bool
43505 x86_extended_reg_mentioned_p (rtx insn)
43507 subrtx_iterator::array_type array;
43508 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43510 const_rtx x = *iter;
43511 if (REG_P (x)
43512 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43513 return true;
43515 return false;
43518 /* If profitable, negate (without causing overflow) integer constant
43519 of mode MODE at location LOC. Return true in this case. */
43520 bool
43521 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43523 HOST_WIDE_INT val;
43525 if (!CONST_INT_P (*loc))
43526 return false;
43528 switch (mode)
43530 case DImode:
43531 /* DImode x86_64 constants must fit in 32 bits. */
43532 gcc_assert (x86_64_immediate_operand (*loc, mode));
43534 mode = SImode;
43535 break;
43537 case SImode:
43538 case HImode:
43539 case QImode:
43540 break;
43542 default:
43543 gcc_unreachable ();
43546 /* Avoid overflows. */
43547 if (mode_signbit_p (mode, *loc))
43548 return false;
43550 val = INTVAL (*loc);
43552 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43553 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43554 if ((val < 0 && val != -128)
43555 || val == 128)
43557 *loc = GEN_INT (-val);
43558 return true;
43561 return false;
43564 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43565 optabs would emit if we didn't have TFmode patterns. */
43567 void
43568 x86_emit_floatuns (rtx operands[2])
43570 rtx_code_label *neglab, *donelab;
43571 rtx i0, i1, f0, in, out;
43572 machine_mode mode, inmode;
43574 inmode = GET_MODE (operands[1]);
43575 gcc_assert (inmode == SImode || inmode == DImode);
43577 out = operands[0];
43578 in = force_reg (inmode, operands[1]);
43579 mode = GET_MODE (out);
43580 neglab = gen_label_rtx ();
43581 donelab = gen_label_rtx ();
43582 f0 = gen_reg_rtx (mode);
43584 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43586 expand_float (out, in, 0);
43588 emit_jump_insn (gen_jump (donelab));
43589 emit_barrier ();
43591 emit_label (neglab);
43593 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43594 1, OPTAB_DIRECT);
43595 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43596 1, OPTAB_DIRECT);
43597 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43599 expand_float (f0, i0, 0);
43601 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43603 emit_label (donelab);
43606 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43607 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43608 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43609 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43611 /* Get a vector mode of the same size as the original but with elements
43612 twice as wide. This is only guaranteed to apply to integral vectors. */
43614 static inline machine_mode
43615 get_mode_wider_vector (machine_mode o)
43617 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43618 machine_mode n = GET_MODE_WIDER_MODE (o);
43619 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43620 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43621 return n;
43624 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43625 fill target with val via vec_duplicate. */
43627 static bool
43628 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43630 bool ok;
43631 rtx_insn *insn;
43632 rtx dup;
43634 /* First attempt to recognize VAL as-is. */
43635 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43636 insn = emit_insn (gen_rtx_SET (target, dup));
43637 if (recog_memoized (insn) < 0)
43639 rtx_insn *seq;
43640 /* If that fails, force VAL into a register. */
43642 start_sequence ();
43643 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43644 seq = get_insns ();
43645 end_sequence ();
43646 if (seq)
43647 emit_insn_before (seq, insn);
43649 ok = recog_memoized (insn) >= 0;
43650 gcc_assert (ok);
43652 return true;
43655 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43656 with all elements equal to VAR. Return true if successful. */
43658 static bool
43659 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43660 rtx target, rtx val)
43662 bool ok;
43664 switch (mode)
43666 case V2SImode:
43667 case V2SFmode:
43668 if (!mmx_ok)
43669 return false;
43670 /* FALLTHRU */
43672 case V4DFmode:
43673 case V4DImode:
43674 case V8SFmode:
43675 case V8SImode:
43676 case V2DFmode:
43677 case V2DImode:
43678 case V4SFmode:
43679 case V4SImode:
43680 case V16SImode:
43681 case V8DImode:
43682 case V16SFmode:
43683 case V8DFmode:
43684 return ix86_vector_duplicate_value (mode, target, val);
43686 case V4HImode:
43687 if (!mmx_ok)
43688 return false;
43689 if (TARGET_SSE || TARGET_3DNOW_A)
43691 rtx x;
43693 val = gen_lowpart (SImode, val);
43694 x = gen_rtx_TRUNCATE (HImode, val);
43695 x = gen_rtx_VEC_DUPLICATE (mode, x);
43696 emit_insn (gen_rtx_SET (target, x));
43697 return true;
43699 goto widen;
43701 case V8QImode:
43702 if (!mmx_ok)
43703 return false;
43704 goto widen;
43706 case V8HImode:
43707 if (TARGET_AVX2)
43708 return ix86_vector_duplicate_value (mode, target, val);
43710 if (TARGET_SSE2)
43712 struct expand_vec_perm_d dperm;
43713 rtx tmp1, tmp2;
43715 permute:
43716 memset (&dperm, 0, sizeof (dperm));
43717 dperm.target = target;
43718 dperm.vmode = mode;
43719 dperm.nelt = GET_MODE_NUNITS (mode);
43720 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43721 dperm.one_operand_p = true;
43723 /* Extend to SImode using a paradoxical SUBREG. */
43724 tmp1 = gen_reg_rtx (SImode);
43725 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43727 /* Insert the SImode value as low element of a V4SImode vector. */
43728 tmp2 = gen_reg_rtx (V4SImode);
43729 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43730 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43732 ok = (expand_vec_perm_1 (&dperm)
43733 || expand_vec_perm_broadcast_1 (&dperm));
43734 gcc_assert (ok);
43735 return ok;
43737 goto widen;
43739 case V16QImode:
43740 if (TARGET_AVX2)
43741 return ix86_vector_duplicate_value (mode, target, val);
43743 if (TARGET_SSE2)
43744 goto permute;
43745 goto widen;
43747 widen:
43748 /* Replicate the value once into the next wider mode and recurse. */
43750 machine_mode smode, wsmode, wvmode;
43751 rtx x;
43753 smode = GET_MODE_INNER (mode);
43754 wvmode = get_mode_wider_vector (mode);
43755 wsmode = GET_MODE_INNER (wvmode);
43757 val = convert_modes (wsmode, smode, val, true);
43758 x = expand_simple_binop (wsmode, ASHIFT, val,
43759 GEN_INT (GET_MODE_BITSIZE (smode)),
43760 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43761 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43763 x = gen_reg_rtx (wvmode);
43764 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43765 gcc_assert (ok);
43766 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43767 return ok;
43770 case V16HImode:
43771 case V32QImode:
43772 if (TARGET_AVX2)
43773 return ix86_vector_duplicate_value (mode, target, val);
43774 else
43776 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43777 rtx x = gen_reg_rtx (hvmode);
43779 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43780 gcc_assert (ok);
43782 x = gen_rtx_VEC_CONCAT (mode, x, x);
43783 emit_insn (gen_rtx_SET (target, x));
43785 return true;
43787 case V64QImode:
43788 case V32HImode:
43789 if (TARGET_AVX512BW)
43790 return ix86_vector_duplicate_value (mode, target, val);
43791 else
43793 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43794 rtx x = gen_reg_rtx (hvmode);
43796 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43797 gcc_assert (ok);
43799 x = gen_rtx_VEC_CONCAT (mode, x, x);
43800 emit_insn (gen_rtx_SET (target, x));
43802 return true;
43804 default:
43805 return false;
43809 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43810 whose ONE_VAR element is VAR, and other elements are zero. Return true
43811 if successful. */
43813 static bool
43814 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43815 rtx target, rtx var, int one_var)
43817 machine_mode vsimode;
43818 rtx new_target;
43819 rtx x, tmp;
43820 bool use_vector_set = false;
43822 switch (mode)
43824 case V2DImode:
43825 /* For SSE4.1, we normally use vector set. But if the second
43826 element is zero and inter-unit moves are OK, we use movq
43827 instead. */
43828 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43829 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43830 && one_var == 0));
43831 break;
43832 case V16QImode:
43833 case V4SImode:
43834 case V4SFmode:
43835 use_vector_set = TARGET_SSE4_1;
43836 break;
43837 case V8HImode:
43838 use_vector_set = TARGET_SSE2;
43839 break;
43840 case V4HImode:
43841 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43842 break;
43843 case V32QImode:
43844 case V16HImode:
43845 case V8SImode:
43846 case V8SFmode:
43847 case V4DFmode:
43848 use_vector_set = TARGET_AVX;
43849 break;
43850 case V4DImode:
43851 /* Use ix86_expand_vector_set in 64bit mode only. */
43852 use_vector_set = TARGET_AVX && TARGET_64BIT;
43853 break;
43854 default:
43855 break;
43858 if (use_vector_set)
43860 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
43861 var = force_reg (GET_MODE_INNER (mode), var);
43862 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43863 return true;
43866 switch (mode)
43868 case V2SFmode:
43869 case V2SImode:
43870 if (!mmx_ok)
43871 return false;
43872 /* FALLTHRU */
43874 case V2DFmode:
43875 case V2DImode:
43876 if (one_var != 0)
43877 return false;
43878 var = force_reg (GET_MODE_INNER (mode), var);
43879 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43880 emit_insn (gen_rtx_SET (target, x));
43881 return true;
43883 case V4SFmode:
43884 case V4SImode:
43885 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43886 new_target = gen_reg_rtx (mode);
43887 else
43888 new_target = target;
43889 var = force_reg (GET_MODE_INNER (mode), var);
43890 x = gen_rtx_VEC_DUPLICATE (mode, var);
43891 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43892 emit_insn (gen_rtx_SET (new_target, x));
43893 if (one_var != 0)
43895 /* We need to shuffle the value to the correct position, so
43896 create a new pseudo to store the intermediate result. */
43898 /* With SSE2, we can use the integer shuffle insns. */
43899 if (mode != V4SFmode && TARGET_SSE2)
43901 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43902 const1_rtx,
43903 GEN_INT (one_var == 1 ? 0 : 1),
43904 GEN_INT (one_var == 2 ? 0 : 1),
43905 GEN_INT (one_var == 3 ? 0 : 1)));
43906 if (target != new_target)
43907 emit_move_insn (target, new_target);
43908 return true;
43911 /* Otherwise convert the intermediate result to V4SFmode and
43912 use the SSE1 shuffle instructions. */
43913 if (mode != V4SFmode)
43915 tmp = gen_reg_rtx (V4SFmode);
43916 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43918 else
43919 tmp = new_target;
43921 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43922 const1_rtx,
43923 GEN_INT (one_var == 1 ? 0 : 1),
43924 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43925 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43927 if (mode != V4SFmode)
43928 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43929 else if (tmp != target)
43930 emit_move_insn (target, tmp);
43932 else if (target != new_target)
43933 emit_move_insn (target, new_target);
43934 return true;
43936 case V8HImode:
43937 case V16QImode:
43938 vsimode = V4SImode;
43939 goto widen;
43940 case V4HImode:
43941 case V8QImode:
43942 if (!mmx_ok)
43943 return false;
43944 vsimode = V2SImode;
43945 goto widen;
43946 widen:
43947 if (one_var != 0)
43948 return false;
43950 /* Zero extend the variable element to SImode and recurse. */
43951 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
43953 x = gen_reg_rtx (vsimode);
43954 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
43955 var, one_var))
43956 gcc_unreachable ();
43958 emit_move_insn (target, gen_lowpart (mode, x));
43959 return true;
43961 default:
43962 return false;
43966 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43967 consisting of the values in VALS. It is known that all elements
43968 except ONE_VAR are constants. Return true if successful. */
43970 static bool
43971 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
43972 rtx target, rtx vals, int one_var)
43974 rtx var = XVECEXP (vals, 0, one_var);
43975 machine_mode wmode;
43976 rtx const_vec, x;
43978 const_vec = copy_rtx (vals);
43979 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
43980 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
43982 switch (mode)
43984 case V2DFmode:
43985 case V2DImode:
43986 case V2SFmode:
43987 case V2SImode:
43988 /* For the two element vectors, it's just as easy to use
43989 the general case. */
43990 return false;
43992 case V4DImode:
43993 /* Use ix86_expand_vector_set in 64bit mode only. */
43994 if (!TARGET_64BIT)
43995 return false;
43996 case V4DFmode:
43997 case V8SFmode:
43998 case V8SImode:
43999 case V16HImode:
44000 case V32QImode:
44001 case V4SFmode:
44002 case V4SImode:
44003 case V8HImode:
44004 case V4HImode:
44005 break;
44007 case V16QImode:
44008 if (TARGET_SSE4_1)
44009 break;
44010 wmode = V8HImode;
44011 goto widen;
44012 case V8QImode:
44013 wmode = V4HImode;
44014 goto widen;
44015 widen:
44016 /* There's no way to set one QImode entry easily. Combine
44017 the variable value with its adjacent constant value, and
44018 promote to an HImode set. */
44019 x = XVECEXP (vals, 0, one_var ^ 1);
44020 if (one_var & 1)
44022 var = convert_modes (HImode, QImode, var, true);
44023 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44024 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44025 x = GEN_INT (INTVAL (x) & 0xff);
44027 else
44029 var = convert_modes (HImode, QImode, var, true);
44030 x = gen_int_mode (INTVAL (x) << 8, HImode);
44032 if (x != const0_rtx)
44033 var = expand_simple_binop (HImode, IOR, var, x, var,
44034 1, OPTAB_LIB_WIDEN);
44036 x = gen_reg_rtx (wmode);
44037 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44038 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44040 emit_move_insn (target, gen_lowpart (mode, x));
44041 return true;
44043 default:
44044 return false;
44047 emit_move_insn (target, const_vec);
44048 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44049 return true;
44052 /* A subroutine of ix86_expand_vector_init_general. Use vector
44053 concatenate to handle the most general case: all values variable,
44054 and none identical. */
44056 static void
44057 ix86_expand_vector_init_concat (machine_mode mode,
44058 rtx target, rtx *ops, int n)
44060 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44061 rtx first[16], second[8], third[4];
44062 rtvec v;
44063 int i, j;
44065 switch (n)
44067 case 2:
44068 switch (mode)
44070 case V16SImode:
44071 cmode = V8SImode;
44072 break;
44073 case V16SFmode:
44074 cmode = V8SFmode;
44075 break;
44076 case V8DImode:
44077 cmode = V4DImode;
44078 break;
44079 case V8DFmode:
44080 cmode = V4DFmode;
44081 break;
44082 case V8SImode:
44083 cmode = V4SImode;
44084 break;
44085 case V8SFmode:
44086 cmode = V4SFmode;
44087 break;
44088 case V4DImode:
44089 cmode = V2DImode;
44090 break;
44091 case V4DFmode:
44092 cmode = V2DFmode;
44093 break;
44094 case V4SImode:
44095 cmode = V2SImode;
44096 break;
44097 case V4SFmode:
44098 cmode = V2SFmode;
44099 break;
44100 case V2DImode:
44101 cmode = DImode;
44102 break;
44103 case V2SImode:
44104 cmode = SImode;
44105 break;
44106 case V2DFmode:
44107 cmode = DFmode;
44108 break;
44109 case V2SFmode:
44110 cmode = SFmode;
44111 break;
44112 default:
44113 gcc_unreachable ();
44116 if (!register_operand (ops[1], cmode))
44117 ops[1] = force_reg (cmode, ops[1]);
44118 if (!register_operand (ops[0], cmode))
44119 ops[0] = force_reg (cmode, ops[0]);
44120 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44121 ops[1])));
44122 break;
44124 case 4:
44125 switch (mode)
44127 case V4DImode:
44128 cmode = V2DImode;
44129 break;
44130 case V4DFmode:
44131 cmode = V2DFmode;
44132 break;
44133 case V4SImode:
44134 cmode = V2SImode;
44135 break;
44136 case V4SFmode:
44137 cmode = V2SFmode;
44138 break;
44139 default:
44140 gcc_unreachable ();
44142 goto half;
44144 case 8:
44145 switch (mode)
44147 case V8DImode:
44148 cmode = V2DImode;
44149 hmode = V4DImode;
44150 break;
44151 case V8DFmode:
44152 cmode = V2DFmode;
44153 hmode = V4DFmode;
44154 break;
44155 case V8SImode:
44156 cmode = V2SImode;
44157 hmode = V4SImode;
44158 break;
44159 case V8SFmode:
44160 cmode = V2SFmode;
44161 hmode = V4SFmode;
44162 break;
44163 default:
44164 gcc_unreachable ();
44166 goto half;
44168 case 16:
44169 switch (mode)
44171 case V16SImode:
44172 cmode = V2SImode;
44173 hmode = V4SImode;
44174 gmode = V8SImode;
44175 break;
44176 case V16SFmode:
44177 cmode = V2SFmode;
44178 hmode = V4SFmode;
44179 gmode = V8SFmode;
44180 break;
44181 default:
44182 gcc_unreachable ();
44184 goto half;
44186 half:
44187 /* FIXME: We process inputs backward to help RA. PR 36222. */
44188 i = n - 1;
44189 j = (n >> 1) - 1;
44190 for (; i > 0; i -= 2, j--)
44192 first[j] = gen_reg_rtx (cmode);
44193 v = gen_rtvec (2, ops[i - 1], ops[i]);
44194 ix86_expand_vector_init (false, first[j],
44195 gen_rtx_PARALLEL (cmode, v));
44198 n >>= 1;
44199 if (n > 4)
44201 gcc_assert (hmode != VOIDmode);
44202 gcc_assert (gmode != VOIDmode);
44203 for (i = j = 0; i < n; i += 2, j++)
44205 second[j] = gen_reg_rtx (hmode);
44206 ix86_expand_vector_init_concat (hmode, second [j],
44207 &first [i], 2);
44209 n >>= 1;
44210 for (i = j = 0; i < n; i += 2, j++)
44212 third[j] = gen_reg_rtx (gmode);
44213 ix86_expand_vector_init_concat (gmode, third[j],
44214 &second[i], 2);
44216 n >>= 1;
44217 ix86_expand_vector_init_concat (mode, target, third, n);
44219 else if (n > 2)
44221 gcc_assert (hmode != VOIDmode);
44222 for (i = j = 0; i < n; i += 2, j++)
44224 second[j] = gen_reg_rtx (hmode);
44225 ix86_expand_vector_init_concat (hmode, second [j],
44226 &first [i], 2);
44228 n >>= 1;
44229 ix86_expand_vector_init_concat (mode, target, second, n);
44231 else
44232 ix86_expand_vector_init_concat (mode, target, first, n);
44233 break;
44235 default:
44236 gcc_unreachable ();
44240 /* A subroutine of ix86_expand_vector_init_general. Use vector
44241 interleave to handle the most general case: all values variable,
44242 and none identical. */
44244 static void
44245 ix86_expand_vector_init_interleave (machine_mode mode,
44246 rtx target, rtx *ops, int n)
44248 machine_mode first_imode, second_imode, third_imode, inner_mode;
44249 int i, j;
44250 rtx op0, op1;
44251 rtx (*gen_load_even) (rtx, rtx, rtx);
44252 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44253 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44255 switch (mode)
44257 case V8HImode:
44258 gen_load_even = gen_vec_setv8hi;
44259 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44260 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44261 inner_mode = HImode;
44262 first_imode = V4SImode;
44263 second_imode = V2DImode;
44264 third_imode = VOIDmode;
44265 break;
44266 case V16QImode:
44267 gen_load_even = gen_vec_setv16qi;
44268 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44269 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44270 inner_mode = QImode;
44271 first_imode = V8HImode;
44272 second_imode = V4SImode;
44273 third_imode = V2DImode;
44274 break;
44275 default:
44276 gcc_unreachable ();
44279 for (i = 0; i < n; i++)
44281 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44282 op0 = gen_reg_rtx (SImode);
44283 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44285 /* Insert the SImode value as low element of V4SImode vector. */
44286 op1 = gen_reg_rtx (V4SImode);
44287 op0 = gen_rtx_VEC_MERGE (V4SImode,
44288 gen_rtx_VEC_DUPLICATE (V4SImode,
44289 op0),
44290 CONST0_RTX (V4SImode),
44291 const1_rtx);
44292 emit_insn (gen_rtx_SET (op1, op0));
44294 /* Cast the V4SImode vector back to a vector in orignal mode. */
44295 op0 = gen_reg_rtx (mode);
44296 emit_move_insn (op0, gen_lowpart (mode, op1));
44298 /* Load even elements into the second position. */
44299 emit_insn (gen_load_even (op0,
44300 force_reg (inner_mode,
44301 ops [i + i + 1]),
44302 const1_rtx));
44304 /* Cast vector to FIRST_IMODE vector. */
44305 ops[i] = gen_reg_rtx (first_imode);
44306 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44309 /* Interleave low FIRST_IMODE vectors. */
44310 for (i = j = 0; i < n; i += 2, j++)
44312 op0 = gen_reg_rtx (first_imode);
44313 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44315 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44316 ops[j] = gen_reg_rtx (second_imode);
44317 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44320 /* Interleave low SECOND_IMODE vectors. */
44321 switch (second_imode)
44323 case V4SImode:
44324 for (i = j = 0; i < n / 2; i += 2, j++)
44326 op0 = gen_reg_rtx (second_imode);
44327 emit_insn (gen_interleave_second_low (op0, ops[i],
44328 ops[i + 1]));
44330 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44331 vector. */
44332 ops[j] = gen_reg_rtx (third_imode);
44333 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44335 second_imode = V2DImode;
44336 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44337 /* FALLTHRU */
44339 case V2DImode:
44340 op0 = gen_reg_rtx (second_imode);
44341 emit_insn (gen_interleave_second_low (op0, ops[0],
44342 ops[1]));
44344 /* Cast the SECOND_IMODE vector back to a vector on original
44345 mode. */
44346 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44347 break;
44349 default:
44350 gcc_unreachable ();
44354 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44355 all values variable, and none identical. */
44357 static void
44358 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44359 rtx target, rtx vals)
44361 rtx ops[64], op0, op1, op2, op3, op4, op5;
44362 machine_mode half_mode = VOIDmode;
44363 machine_mode quarter_mode = VOIDmode;
44364 int n, i;
44366 switch (mode)
44368 case V2SFmode:
44369 case V2SImode:
44370 if (!mmx_ok && !TARGET_SSE)
44371 break;
44372 /* FALLTHRU */
44374 case V16SImode:
44375 case V16SFmode:
44376 case V8DFmode:
44377 case V8DImode:
44378 case V8SFmode:
44379 case V8SImode:
44380 case V4DFmode:
44381 case V4DImode:
44382 case V4SFmode:
44383 case V4SImode:
44384 case V2DFmode:
44385 case V2DImode:
44386 n = GET_MODE_NUNITS (mode);
44387 for (i = 0; i < n; i++)
44388 ops[i] = XVECEXP (vals, 0, i);
44389 ix86_expand_vector_init_concat (mode, target, ops, n);
44390 return;
44392 case V32QImode:
44393 half_mode = V16QImode;
44394 goto half;
44396 case V16HImode:
44397 half_mode = V8HImode;
44398 goto half;
44400 half:
44401 n = GET_MODE_NUNITS (mode);
44402 for (i = 0; i < n; i++)
44403 ops[i] = XVECEXP (vals, 0, i);
44404 op0 = gen_reg_rtx (half_mode);
44405 op1 = gen_reg_rtx (half_mode);
44406 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44407 n >> 2);
44408 ix86_expand_vector_init_interleave (half_mode, op1,
44409 &ops [n >> 1], n >> 2);
44410 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44411 return;
44413 case V64QImode:
44414 quarter_mode = V16QImode;
44415 half_mode = V32QImode;
44416 goto quarter;
44418 case V32HImode:
44419 quarter_mode = V8HImode;
44420 half_mode = V16HImode;
44421 goto quarter;
44423 quarter:
44424 n = GET_MODE_NUNITS (mode);
44425 for (i = 0; i < n; i++)
44426 ops[i] = XVECEXP (vals, 0, i);
44427 op0 = gen_reg_rtx (quarter_mode);
44428 op1 = gen_reg_rtx (quarter_mode);
44429 op2 = gen_reg_rtx (quarter_mode);
44430 op3 = gen_reg_rtx (quarter_mode);
44431 op4 = gen_reg_rtx (half_mode);
44432 op5 = gen_reg_rtx (half_mode);
44433 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44434 n >> 3);
44435 ix86_expand_vector_init_interleave (quarter_mode, op1,
44436 &ops [n >> 2], n >> 3);
44437 ix86_expand_vector_init_interleave (quarter_mode, op2,
44438 &ops [n >> 1], n >> 3);
44439 ix86_expand_vector_init_interleave (quarter_mode, op3,
44440 &ops [(n >> 1) | (n >> 2)], n >> 3);
44441 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44442 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44443 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44444 return;
44446 case V16QImode:
44447 if (!TARGET_SSE4_1)
44448 break;
44449 /* FALLTHRU */
44451 case V8HImode:
44452 if (!TARGET_SSE2)
44453 break;
44455 /* Don't use ix86_expand_vector_init_interleave if we can't
44456 move from GPR to SSE register directly. */
44457 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44458 break;
44460 n = GET_MODE_NUNITS (mode);
44461 for (i = 0; i < n; i++)
44462 ops[i] = XVECEXP (vals, 0, i);
44463 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44464 return;
44466 case V4HImode:
44467 case V8QImode:
44468 break;
44470 default:
44471 gcc_unreachable ();
44475 int i, j, n_elts, n_words, n_elt_per_word;
44476 machine_mode inner_mode;
44477 rtx words[4], shift;
44479 inner_mode = GET_MODE_INNER (mode);
44480 n_elts = GET_MODE_NUNITS (mode);
44481 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44482 n_elt_per_word = n_elts / n_words;
44483 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44485 for (i = 0; i < n_words; ++i)
44487 rtx word = NULL_RTX;
44489 for (j = 0; j < n_elt_per_word; ++j)
44491 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44492 elt = convert_modes (word_mode, inner_mode, elt, true);
44494 if (j == 0)
44495 word = elt;
44496 else
44498 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44499 word, 1, OPTAB_LIB_WIDEN);
44500 word = expand_simple_binop (word_mode, IOR, word, elt,
44501 word, 1, OPTAB_LIB_WIDEN);
44505 words[i] = word;
44508 if (n_words == 1)
44509 emit_move_insn (target, gen_lowpart (mode, words[0]));
44510 else if (n_words == 2)
44512 rtx tmp = gen_reg_rtx (mode);
44513 emit_clobber (tmp);
44514 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44515 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44516 emit_move_insn (target, tmp);
44518 else if (n_words == 4)
44520 rtx tmp = gen_reg_rtx (V4SImode);
44521 gcc_assert (word_mode == SImode);
44522 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44523 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44524 emit_move_insn (target, gen_lowpart (mode, tmp));
44526 else
44527 gcc_unreachable ();
44531 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44532 instructions unless MMX_OK is true. */
44534 void
44535 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44537 machine_mode mode = GET_MODE (target);
44538 machine_mode inner_mode = GET_MODE_INNER (mode);
44539 int n_elts = GET_MODE_NUNITS (mode);
44540 int n_var = 0, one_var = -1;
44541 bool all_same = true, all_const_zero = true;
44542 int i;
44543 rtx x;
44545 for (i = 0; i < n_elts; ++i)
44547 x = XVECEXP (vals, 0, i);
44548 if (!(CONST_SCALAR_INT_P (x)
44549 || CONST_DOUBLE_P (x)
44550 || CONST_FIXED_P (x)))
44551 n_var++, one_var = i;
44552 else if (x != CONST0_RTX (inner_mode))
44553 all_const_zero = false;
44554 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44555 all_same = false;
44558 /* Constants are best loaded from the constant pool. */
44559 if (n_var == 0)
44561 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44562 return;
44565 /* If all values are identical, broadcast the value. */
44566 if (all_same
44567 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44568 XVECEXP (vals, 0, 0)))
44569 return;
44571 /* Values where only one field is non-constant are best loaded from
44572 the pool and overwritten via move later. */
44573 if (n_var == 1)
44575 if (all_const_zero
44576 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44577 XVECEXP (vals, 0, one_var),
44578 one_var))
44579 return;
44581 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44582 return;
44585 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44588 void
44589 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44591 machine_mode mode = GET_MODE (target);
44592 machine_mode inner_mode = GET_MODE_INNER (mode);
44593 machine_mode half_mode;
44594 bool use_vec_merge = false;
44595 rtx tmp;
44596 static rtx (*gen_extract[6][2]) (rtx, rtx)
44598 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44599 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44600 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44601 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44602 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44603 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44605 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44607 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44608 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44609 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44610 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44611 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44612 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44614 int i, j, n;
44616 switch (mode)
44618 case V2SFmode:
44619 case V2SImode:
44620 if (mmx_ok)
44622 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44623 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44624 if (elt == 0)
44625 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44626 else
44627 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44628 emit_insn (gen_rtx_SET (target, tmp));
44629 return;
44631 break;
44633 case V2DImode:
44634 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44635 if (use_vec_merge)
44636 break;
44638 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44639 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44640 if (elt == 0)
44641 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44642 else
44643 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44644 emit_insn (gen_rtx_SET (target, tmp));
44645 return;
44647 case V2DFmode:
44649 rtx op0, op1;
44651 /* For the two element vectors, we implement a VEC_CONCAT with
44652 the extraction of the other element. */
44654 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44655 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44657 if (elt == 0)
44658 op0 = val, op1 = tmp;
44659 else
44660 op0 = tmp, op1 = val;
44662 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44663 emit_insn (gen_rtx_SET (target, tmp));
44665 return;
44667 case V4SFmode:
44668 use_vec_merge = TARGET_SSE4_1;
44669 if (use_vec_merge)
44670 break;
44672 switch (elt)
44674 case 0:
44675 use_vec_merge = true;
44676 break;
44678 case 1:
44679 /* tmp = target = A B C D */
44680 tmp = copy_to_reg (target);
44681 /* target = A A B B */
44682 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44683 /* target = X A B B */
44684 ix86_expand_vector_set (false, target, val, 0);
44685 /* target = A X C D */
44686 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44687 const1_rtx, const0_rtx,
44688 GEN_INT (2+4), GEN_INT (3+4)));
44689 return;
44691 case 2:
44692 /* tmp = target = A B C D */
44693 tmp = copy_to_reg (target);
44694 /* tmp = X B C D */
44695 ix86_expand_vector_set (false, tmp, val, 0);
44696 /* target = A B X D */
44697 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44698 const0_rtx, const1_rtx,
44699 GEN_INT (0+4), GEN_INT (3+4)));
44700 return;
44702 case 3:
44703 /* tmp = target = A B C D */
44704 tmp = copy_to_reg (target);
44705 /* tmp = X B C D */
44706 ix86_expand_vector_set (false, tmp, val, 0);
44707 /* target = A B X D */
44708 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44709 const0_rtx, const1_rtx,
44710 GEN_INT (2+4), GEN_INT (0+4)));
44711 return;
44713 default:
44714 gcc_unreachable ();
44716 break;
44718 case V4SImode:
44719 use_vec_merge = TARGET_SSE4_1;
44720 if (use_vec_merge)
44721 break;
44723 /* Element 0 handled by vec_merge below. */
44724 if (elt == 0)
44726 use_vec_merge = true;
44727 break;
44730 if (TARGET_SSE2)
44732 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44733 store into element 0, then shuffle them back. */
44735 rtx order[4];
44737 order[0] = GEN_INT (elt);
44738 order[1] = const1_rtx;
44739 order[2] = const2_rtx;
44740 order[3] = GEN_INT (3);
44741 order[elt] = const0_rtx;
44743 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44744 order[1], order[2], order[3]));
44746 ix86_expand_vector_set (false, target, val, 0);
44748 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44749 order[1], order[2], order[3]));
44751 else
44753 /* For SSE1, we have to reuse the V4SF code. */
44754 rtx t = gen_reg_rtx (V4SFmode);
44755 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44756 emit_move_insn (target, gen_lowpart (mode, t));
44758 return;
44760 case V8HImode:
44761 use_vec_merge = TARGET_SSE2;
44762 break;
44763 case V4HImode:
44764 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44765 break;
44767 case V16QImode:
44768 use_vec_merge = TARGET_SSE4_1;
44769 break;
44771 case V8QImode:
44772 break;
44774 case V32QImode:
44775 half_mode = V16QImode;
44776 j = 0;
44777 n = 16;
44778 goto half;
44780 case V16HImode:
44781 half_mode = V8HImode;
44782 j = 1;
44783 n = 8;
44784 goto half;
44786 case V8SImode:
44787 half_mode = V4SImode;
44788 j = 2;
44789 n = 4;
44790 goto half;
44792 case V4DImode:
44793 half_mode = V2DImode;
44794 j = 3;
44795 n = 2;
44796 goto half;
44798 case V8SFmode:
44799 half_mode = V4SFmode;
44800 j = 4;
44801 n = 4;
44802 goto half;
44804 case V4DFmode:
44805 half_mode = V2DFmode;
44806 j = 5;
44807 n = 2;
44808 goto half;
44810 half:
44811 /* Compute offset. */
44812 i = elt / n;
44813 elt %= n;
44815 gcc_assert (i <= 1);
44817 /* Extract the half. */
44818 tmp = gen_reg_rtx (half_mode);
44819 emit_insn (gen_extract[j][i] (tmp, target));
44821 /* Put val in tmp at elt. */
44822 ix86_expand_vector_set (false, tmp, val, elt);
44824 /* Put it back. */
44825 emit_insn (gen_insert[j][i] (target, target, tmp));
44826 return;
44828 case V8DFmode:
44829 if (TARGET_AVX512F)
44831 tmp = gen_reg_rtx (mode);
44832 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44833 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44834 force_reg (QImode, GEN_INT (1 << elt))));
44835 return;
44837 else
44838 break;
44839 case V8DImode:
44840 if (TARGET_AVX512F)
44842 tmp = gen_reg_rtx (mode);
44843 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44844 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44845 force_reg (QImode, GEN_INT (1 << elt))));
44846 return;
44848 else
44849 break;
44850 case V16SFmode:
44851 if (TARGET_AVX512F)
44853 tmp = gen_reg_rtx (mode);
44854 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44855 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44856 force_reg (HImode, GEN_INT (1 << elt))));
44857 return;
44859 else
44860 break;
44861 case V16SImode:
44862 if (TARGET_AVX512F)
44864 tmp = gen_reg_rtx (mode);
44865 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44866 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44867 force_reg (HImode, GEN_INT (1 << elt))));
44868 return;
44870 else
44871 break;
44872 case V32HImode:
44873 if (TARGET_AVX512F && TARGET_AVX512BW)
44875 tmp = gen_reg_rtx (mode);
44876 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44877 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44878 force_reg (SImode, GEN_INT (1 << elt))));
44879 return;
44881 else
44882 break;
44883 case V64QImode:
44884 if (TARGET_AVX512F && TARGET_AVX512BW)
44886 tmp = gen_reg_rtx (mode);
44887 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
44888 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44889 force_reg (DImode, GEN_INT (1 << elt))));
44890 return;
44892 else
44893 break;
44895 default:
44896 break;
44899 if (use_vec_merge)
44901 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44902 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44903 emit_insn (gen_rtx_SET (target, tmp));
44905 else
44907 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44909 emit_move_insn (mem, target);
44911 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44912 emit_move_insn (tmp, val);
44914 emit_move_insn (target, mem);
44918 void
44919 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
44921 machine_mode mode = GET_MODE (vec);
44922 machine_mode inner_mode = GET_MODE_INNER (mode);
44923 bool use_vec_extr = false;
44924 rtx tmp;
44926 switch (mode)
44928 case V2SImode:
44929 case V2SFmode:
44930 if (!mmx_ok)
44931 break;
44932 /* FALLTHRU */
44934 case V2DFmode:
44935 case V2DImode:
44936 use_vec_extr = true;
44937 break;
44939 case V4SFmode:
44940 use_vec_extr = TARGET_SSE4_1;
44941 if (use_vec_extr)
44942 break;
44944 switch (elt)
44946 case 0:
44947 tmp = vec;
44948 break;
44950 case 1:
44951 case 3:
44952 tmp = gen_reg_rtx (mode);
44953 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
44954 GEN_INT (elt), GEN_INT (elt),
44955 GEN_INT (elt+4), GEN_INT (elt+4)));
44956 break;
44958 case 2:
44959 tmp = gen_reg_rtx (mode);
44960 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
44961 break;
44963 default:
44964 gcc_unreachable ();
44966 vec = tmp;
44967 use_vec_extr = true;
44968 elt = 0;
44969 break;
44971 case V4SImode:
44972 use_vec_extr = TARGET_SSE4_1;
44973 if (use_vec_extr)
44974 break;
44976 if (TARGET_SSE2)
44978 switch (elt)
44980 case 0:
44981 tmp = vec;
44982 break;
44984 case 1:
44985 case 3:
44986 tmp = gen_reg_rtx (mode);
44987 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
44988 GEN_INT (elt), GEN_INT (elt),
44989 GEN_INT (elt), GEN_INT (elt)));
44990 break;
44992 case 2:
44993 tmp = gen_reg_rtx (mode);
44994 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
44995 break;
44997 default:
44998 gcc_unreachable ();
45000 vec = tmp;
45001 use_vec_extr = true;
45002 elt = 0;
45004 else
45006 /* For SSE1, we have to reuse the V4SF code. */
45007 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45008 gen_lowpart (V4SFmode, vec), elt);
45009 return;
45011 break;
45013 case V8HImode:
45014 use_vec_extr = TARGET_SSE2;
45015 break;
45016 case V4HImode:
45017 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45018 break;
45020 case V16QImode:
45021 use_vec_extr = TARGET_SSE4_1;
45022 break;
45024 case V8SFmode:
45025 if (TARGET_AVX)
45027 tmp = gen_reg_rtx (V4SFmode);
45028 if (elt < 4)
45029 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45030 else
45031 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45032 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45033 return;
45035 break;
45037 case V4DFmode:
45038 if (TARGET_AVX)
45040 tmp = gen_reg_rtx (V2DFmode);
45041 if (elt < 2)
45042 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45043 else
45044 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45045 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45046 return;
45048 break;
45050 case V32QImode:
45051 if (TARGET_AVX)
45053 tmp = gen_reg_rtx (V16QImode);
45054 if (elt < 16)
45055 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45056 else
45057 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45058 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45059 return;
45061 break;
45063 case V16HImode:
45064 if (TARGET_AVX)
45066 tmp = gen_reg_rtx (V8HImode);
45067 if (elt < 8)
45068 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45069 else
45070 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45071 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45072 return;
45074 break;
45076 case V8SImode:
45077 if (TARGET_AVX)
45079 tmp = gen_reg_rtx (V4SImode);
45080 if (elt < 4)
45081 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45082 else
45083 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45084 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45085 return;
45087 break;
45089 case V4DImode:
45090 if (TARGET_AVX)
45092 tmp = gen_reg_rtx (V2DImode);
45093 if (elt < 2)
45094 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45095 else
45096 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45097 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45098 return;
45100 break;
45102 case V32HImode:
45103 if (TARGET_AVX512BW)
45105 tmp = gen_reg_rtx (V16HImode);
45106 if (elt < 16)
45107 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45108 else
45109 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45110 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45111 return;
45113 break;
45115 case V64QImode:
45116 if (TARGET_AVX512BW)
45118 tmp = gen_reg_rtx (V32QImode);
45119 if (elt < 32)
45120 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45121 else
45122 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45123 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45124 return;
45126 break;
45128 case V16SFmode:
45129 tmp = gen_reg_rtx (V8SFmode);
45130 if (elt < 8)
45131 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45132 else
45133 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45134 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45135 return;
45137 case V8DFmode:
45138 tmp = gen_reg_rtx (V4DFmode);
45139 if (elt < 4)
45140 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45141 else
45142 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45143 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45144 return;
45146 case V16SImode:
45147 tmp = gen_reg_rtx (V8SImode);
45148 if (elt < 8)
45149 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45150 else
45151 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45152 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45153 return;
45155 case V8DImode:
45156 tmp = gen_reg_rtx (V4DImode);
45157 if (elt < 4)
45158 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45159 else
45160 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45161 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45162 return;
45164 case V8QImode:
45165 /* ??? Could extract the appropriate HImode element and shift. */
45166 default:
45167 break;
45170 if (use_vec_extr)
45172 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45173 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45175 /* Let the rtl optimizers know about the zero extension performed. */
45176 if (inner_mode == QImode || inner_mode == HImode)
45178 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45179 target = gen_lowpart (SImode, target);
45182 emit_insn (gen_rtx_SET (target, tmp));
45184 else
45186 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45188 emit_move_insn (mem, vec);
45190 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45191 emit_move_insn (target, tmp);
45195 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45196 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45197 The upper bits of DEST are undefined, though they shouldn't cause
45198 exceptions (some bits from src or all zeros are ok). */
45200 static void
45201 emit_reduc_half (rtx dest, rtx src, int i)
45203 rtx tem, d = dest;
45204 switch (GET_MODE (src))
45206 case V4SFmode:
45207 if (i == 128)
45208 tem = gen_sse_movhlps (dest, src, src);
45209 else
45210 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45211 GEN_INT (1 + 4), GEN_INT (1 + 4));
45212 break;
45213 case V2DFmode:
45214 tem = gen_vec_interleave_highv2df (dest, src, src);
45215 break;
45216 case V16QImode:
45217 case V8HImode:
45218 case V4SImode:
45219 case V2DImode:
45220 d = gen_reg_rtx (V1TImode);
45221 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45222 GEN_INT (i / 2));
45223 break;
45224 case V8SFmode:
45225 if (i == 256)
45226 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45227 else
45228 tem = gen_avx_shufps256 (dest, src, src,
45229 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45230 break;
45231 case V4DFmode:
45232 if (i == 256)
45233 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45234 else
45235 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45236 break;
45237 case V32QImode:
45238 case V16HImode:
45239 case V8SImode:
45240 case V4DImode:
45241 if (i == 256)
45243 if (GET_MODE (dest) != V4DImode)
45244 d = gen_reg_rtx (V4DImode);
45245 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45246 gen_lowpart (V4DImode, src),
45247 const1_rtx);
45249 else
45251 d = gen_reg_rtx (V2TImode);
45252 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45253 GEN_INT (i / 2));
45255 break;
45256 case V64QImode:
45257 case V32HImode:
45258 case V16SImode:
45259 case V16SFmode:
45260 case V8DImode:
45261 case V8DFmode:
45262 if (i > 128)
45263 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45264 gen_lowpart (V16SImode, src),
45265 gen_lowpart (V16SImode, src),
45266 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45267 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45268 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45269 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45270 GEN_INT (0xC), GEN_INT (0xD),
45271 GEN_INT (0xE), GEN_INT (0xF),
45272 GEN_INT (0x10), GEN_INT (0x11),
45273 GEN_INT (0x12), GEN_INT (0x13),
45274 GEN_INT (0x14), GEN_INT (0x15),
45275 GEN_INT (0x16), GEN_INT (0x17));
45276 else
45277 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45278 gen_lowpart (V16SImode, src),
45279 GEN_INT (i == 128 ? 0x2 : 0x1),
45280 GEN_INT (0x3),
45281 GEN_INT (0x3),
45282 GEN_INT (0x3),
45283 GEN_INT (i == 128 ? 0x6 : 0x5),
45284 GEN_INT (0x7),
45285 GEN_INT (0x7),
45286 GEN_INT (0x7),
45287 GEN_INT (i == 128 ? 0xA : 0x9),
45288 GEN_INT (0xB),
45289 GEN_INT (0xB),
45290 GEN_INT (0xB),
45291 GEN_INT (i == 128 ? 0xE : 0xD),
45292 GEN_INT (0xF),
45293 GEN_INT (0xF),
45294 GEN_INT (0xF));
45295 break;
45296 default:
45297 gcc_unreachable ();
45299 emit_insn (tem);
45300 if (d != dest)
45301 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45304 /* Expand a vector reduction. FN is the binary pattern to reduce;
45305 DEST is the destination; IN is the input vector. */
45307 void
45308 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45310 rtx half, dst, vec = in;
45311 machine_mode mode = GET_MODE (in);
45312 int i;
45314 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45315 if (TARGET_SSE4_1
45316 && mode == V8HImode
45317 && fn == gen_uminv8hi3)
45319 emit_insn (gen_sse4_1_phminposuw (dest, in));
45320 return;
45323 for (i = GET_MODE_BITSIZE (mode);
45324 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45325 i >>= 1)
45327 half = gen_reg_rtx (mode);
45328 emit_reduc_half (half, vec, i);
45329 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45330 dst = dest;
45331 else
45332 dst = gen_reg_rtx (mode);
45333 emit_insn (fn (dst, half, vec));
45334 vec = dst;
45338 /* Target hook for scalar_mode_supported_p. */
45339 static bool
45340 ix86_scalar_mode_supported_p (machine_mode mode)
45342 if (DECIMAL_FLOAT_MODE_P (mode))
45343 return default_decimal_float_supported_p ();
45344 else if (mode == TFmode)
45345 return true;
45346 else
45347 return default_scalar_mode_supported_p (mode);
45350 /* Implements target hook vector_mode_supported_p. */
45351 static bool
45352 ix86_vector_mode_supported_p (machine_mode mode)
45354 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45355 return true;
45356 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45357 return true;
45358 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45359 return true;
45360 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45361 return true;
45362 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45363 return true;
45364 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45365 return true;
45366 return false;
45369 /* Implement target hook libgcc_floating_mode_supported_p. */
45370 static bool
45371 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45373 switch (mode)
45375 case SFmode:
45376 case DFmode:
45377 case XFmode:
45378 return true;
45380 case TFmode:
45381 #ifdef IX86_NO_LIBGCC_TFMODE
45382 return false;
45383 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45384 return TARGET_LONG_DOUBLE_128;
45385 #else
45386 return true;
45387 #endif
45389 default:
45390 return false;
45394 /* Target hook for c_mode_for_suffix. */
45395 static machine_mode
45396 ix86_c_mode_for_suffix (char suffix)
45398 if (suffix == 'q')
45399 return TFmode;
45400 if (suffix == 'w')
45401 return XFmode;
45403 return VOIDmode;
45406 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45408 We do this in the new i386 backend to maintain source compatibility
45409 with the old cc0-based compiler. */
45411 static tree
45412 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45414 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45415 clobbers);
45416 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45417 clobbers);
45418 return clobbers;
45421 /* Implements target vector targetm.asm.encode_section_info. */
45423 static void ATTRIBUTE_UNUSED
45424 ix86_encode_section_info (tree decl, rtx rtl, int first)
45426 default_encode_section_info (decl, rtl, first);
45428 if (ix86_in_large_data_p (decl))
45429 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45432 /* Worker function for REVERSE_CONDITION. */
45434 enum rtx_code
45435 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45437 return (mode != CCFPmode && mode != CCFPUmode
45438 ? reverse_condition (code)
45439 : reverse_condition_maybe_unordered (code));
45442 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45443 to OPERANDS[0]. */
45445 const char *
45446 output_387_reg_move (rtx insn, rtx *operands)
45448 if (REG_P (operands[0]))
45450 if (REG_P (operands[1])
45451 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45453 if (REGNO (operands[0]) == FIRST_STACK_REG)
45454 return output_387_ffreep (operands, 0);
45455 return "fstp\t%y0";
45457 if (STACK_TOP_P (operands[0]))
45458 return "fld%Z1\t%y1";
45459 return "fst\t%y0";
45461 else if (MEM_P (operands[0]))
45463 gcc_assert (REG_P (operands[1]));
45464 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45465 return "fstp%Z0\t%y0";
45466 else
45468 /* There is no non-popping store to memory for XFmode.
45469 So if we need one, follow the store with a load. */
45470 if (GET_MODE (operands[0]) == XFmode)
45471 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45472 else
45473 return "fst%Z0\t%y0";
45476 else
45477 gcc_unreachable();
45480 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45481 FP status register is set. */
45483 void
45484 ix86_emit_fp_unordered_jump (rtx label)
45486 rtx reg = gen_reg_rtx (HImode);
45487 rtx temp;
45489 emit_insn (gen_x86_fnstsw_1 (reg));
45491 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45493 emit_insn (gen_x86_sahf_1 (reg));
45495 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45496 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45498 else
45500 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45502 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45503 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45506 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45507 gen_rtx_LABEL_REF (VOIDmode, label),
45508 pc_rtx);
45509 temp = gen_rtx_SET (pc_rtx, temp);
45511 emit_jump_insn (temp);
45512 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45515 /* Output code to perform a log1p XFmode calculation. */
45517 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45519 rtx_code_label *label1 = gen_label_rtx ();
45520 rtx_code_label *label2 = gen_label_rtx ();
45522 rtx tmp = gen_reg_rtx (XFmode);
45523 rtx tmp2 = gen_reg_rtx (XFmode);
45524 rtx test;
45526 emit_insn (gen_absxf2 (tmp, op1));
45527 test = gen_rtx_GE (VOIDmode, tmp,
45528 CONST_DOUBLE_FROM_REAL_VALUE (
45529 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45530 XFmode));
45531 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45533 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45534 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45535 emit_jump (label2);
45537 emit_label (label1);
45538 emit_move_insn (tmp, CONST1_RTX (XFmode));
45539 emit_insn (gen_addxf3 (tmp, op1, tmp));
45540 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45541 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45543 emit_label (label2);
45546 /* Emit code for round calculation. */
45547 void ix86_emit_i387_round (rtx op0, rtx op1)
45549 machine_mode inmode = GET_MODE (op1);
45550 machine_mode outmode = GET_MODE (op0);
45551 rtx e1, e2, res, tmp, tmp1, half;
45552 rtx scratch = gen_reg_rtx (HImode);
45553 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45554 rtx_code_label *jump_label = gen_label_rtx ();
45555 rtx insn;
45556 rtx (*gen_abs) (rtx, rtx);
45557 rtx (*gen_neg) (rtx, rtx);
45559 switch (inmode)
45561 case SFmode:
45562 gen_abs = gen_abssf2;
45563 break;
45564 case DFmode:
45565 gen_abs = gen_absdf2;
45566 break;
45567 case XFmode:
45568 gen_abs = gen_absxf2;
45569 break;
45570 default:
45571 gcc_unreachable ();
45574 switch (outmode)
45576 case SFmode:
45577 gen_neg = gen_negsf2;
45578 break;
45579 case DFmode:
45580 gen_neg = gen_negdf2;
45581 break;
45582 case XFmode:
45583 gen_neg = gen_negxf2;
45584 break;
45585 case HImode:
45586 gen_neg = gen_neghi2;
45587 break;
45588 case SImode:
45589 gen_neg = gen_negsi2;
45590 break;
45591 case DImode:
45592 gen_neg = gen_negdi2;
45593 break;
45594 default:
45595 gcc_unreachable ();
45598 e1 = gen_reg_rtx (inmode);
45599 e2 = gen_reg_rtx (inmode);
45600 res = gen_reg_rtx (outmode);
45602 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45604 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45606 /* scratch = fxam(op1) */
45607 emit_insn (gen_rtx_SET (scratch,
45608 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45609 UNSPEC_FXAM)));
45610 /* e1 = fabs(op1) */
45611 emit_insn (gen_abs (e1, op1));
45613 /* e2 = e1 + 0.5 */
45614 half = force_reg (inmode, half);
45615 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
45617 /* res = floor(e2) */
45618 if (inmode != XFmode)
45620 tmp1 = gen_reg_rtx (XFmode);
45622 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45624 else
45625 tmp1 = e2;
45627 switch (outmode)
45629 case SFmode:
45630 case DFmode:
45632 rtx tmp0 = gen_reg_rtx (XFmode);
45634 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45636 emit_insn (gen_rtx_SET (res,
45637 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45638 UNSPEC_TRUNC_NOOP)));
45640 break;
45641 case XFmode:
45642 emit_insn (gen_frndintxf2_floor (res, tmp1));
45643 break;
45644 case HImode:
45645 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45646 break;
45647 case SImode:
45648 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45649 break;
45650 case DImode:
45651 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45652 break;
45653 default:
45654 gcc_unreachable ();
45657 /* flags = signbit(a) */
45658 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45660 /* if (flags) then res = -res */
45661 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45662 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45663 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45664 pc_rtx);
45665 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45666 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45667 JUMP_LABEL (insn) = jump_label;
45669 emit_insn (gen_neg (res, res));
45671 emit_label (jump_label);
45672 LABEL_NUSES (jump_label) = 1;
45674 emit_move_insn (op0, res);
45677 /* Output code to perform a Newton-Rhapson approximation of a single precision
45678 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45680 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45682 rtx x0, x1, e0, e1;
45684 x0 = gen_reg_rtx (mode);
45685 e0 = gen_reg_rtx (mode);
45686 e1 = gen_reg_rtx (mode);
45687 x1 = gen_reg_rtx (mode);
45689 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45691 b = force_reg (mode, b);
45693 /* x0 = rcp(b) estimate */
45694 if (mode == V16SFmode || mode == V8DFmode)
45695 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45696 UNSPEC_RCP14)));
45697 else
45698 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45699 UNSPEC_RCP)));
45701 /* e0 = x0 * b */
45702 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
45704 /* e0 = x0 * e0 */
45705 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
45707 /* e1 = x0 + x0 */
45708 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
45710 /* x1 = e1 - e0 */
45711 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
45713 /* res = a * x1 */
45714 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
45717 /* Output code to perform a Newton-Rhapson approximation of a
45718 single precision floating point [reciprocal] square root. */
45720 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45721 bool recip)
45723 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45724 REAL_VALUE_TYPE r;
45725 int unspec;
45727 x0 = gen_reg_rtx (mode);
45728 e0 = gen_reg_rtx (mode);
45729 e1 = gen_reg_rtx (mode);
45730 e2 = gen_reg_rtx (mode);
45731 e3 = gen_reg_rtx (mode);
45733 real_from_integer (&r, VOIDmode, -3, SIGNED);
45734 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45736 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45737 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45738 unspec = UNSPEC_RSQRT;
45740 if (VECTOR_MODE_P (mode))
45742 mthree = ix86_build_const_vector (mode, true, mthree);
45743 mhalf = ix86_build_const_vector (mode, true, mhalf);
45744 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45745 if (GET_MODE_SIZE (mode) == 64)
45746 unspec = UNSPEC_RSQRT14;
45749 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45750 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45752 a = force_reg (mode, a);
45754 /* x0 = rsqrt(a) estimate */
45755 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45756 unspec)));
45758 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45759 if (!recip)
45761 rtx zero, mask;
45763 zero = gen_reg_rtx (mode);
45764 mask = gen_reg_rtx (mode);
45766 zero = force_reg (mode, CONST0_RTX(mode));
45768 /* Handle masked compare. */
45769 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45771 mask = gen_reg_rtx (HImode);
45772 /* Imm value 0x4 corresponds to not-equal comparison. */
45773 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45774 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45776 else
45778 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
45780 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
45784 /* e0 = x0 * a */
45785 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
45786 /* e1 = e0 * x0 */
45787 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
45789 /* e2 = e1 - 3. */
45790 mthree = force_reg (mode, mthree);
45791 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
45793 mhalf = force_reg (mode, mhalf);
45794 if (recip)
45795 /* e3 = -.5 * x0 */
45796 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
45797 else
45798 /* e3 = -.5 * e0 */
45799 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
45800 /* ret = e2 * e3 */
45801 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
45804 #ifdef TARGET_SOLARIS
45805 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45807 static void
45808 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45809 tree decl)
45811 /* With Binutils 2.15, the "@unwind" marker must be specified on
45812 every occurrence of the ".eh_frame" section, not just the first
45813 one. */
45814 if (TARGET_64BIT
45815 && strcmp (name, ".eh_frame") == 0)
45817 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45818 flags & SECTION_WRITE ? "aw" : "a");
45819 return;
45822 #ifndef USE_GAS
45823 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45825 solaris_elf_asm_comdat_section (name, flags, decl);
45826 return;
45828 #endif
45830 default_elf_asm_named_section (name, flags, decl);
45832 #endif /* TARGET_SOLARIS */
45834 /* Return the mangling of TYPE if it is an extended fundamental type. */
45836 static const char *
45837 ix86_mangle_type (const_tree type)
45839 type = TYPE_MAIN_VARIANT (type);
45841 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45842 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45843 return NULL;
45845 switch (TYPE_MODE (type))
45847 case TFmode:
45848 /* __float128 is "g". */
45849 return "g";
45850 case XFmode:
45851 /* "long double" or __float80 is "e". */
45852 return "e";
45853 default:
45854 return NULL;
45858 /* For 32-bit code we can save PIC register setup by using
45859 __stack_chk_fail_local hidden function instead of calling
45860 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45861 register, so it is better to call __stack_chk_fail directly. */
45863 static tree ATTRIBUTE_UNUSED
45864 ix86_stack_protect_fail (void)
45866 return TARGET_64BIT
45867 ? default_external_stack_protect_fail ()
45868 : default_hidden_stack_protect_fail ();
45871 /* Select a format to encode pointers in exception handling data. CODE
45872 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45873 true if the symbol may be affected by dynamic relocations.
45875 ??? All x86 object file formats are capable of representing this.
45876 After all, the relocation needed is the same as for the call insn.
45877 Whether or not a particular assembler allows us to enter such, I
45878 guess we'll have to see. */
45880 asm_preferred_eh_data_format (int code, int global)
45882 if (flag_pic)
45884 int type = DW_EH_PE_sdata8;
45885 if (!TARGET_64BIT
45886 || ix86_cmodel == CM_SMALL_PIC
45887 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45888 type = DW_EH_PE_sdata4;
45889 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45891 if (ix86_cmodel == CM_SMALL
45892 || (ix86_cmodel == CM_MEDIUM && code))
45893 return DW_EH_PE_udata4;
45894 return DW_EH_PE_absptr;
45897 /* Expand copysign from SIGN to the positive value ABS_VALUE
45898 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45899 the sign-bit. */
45900 static void
45901 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
45903 machine_mode mode = GET_MODE (sign);
45904 rtx sgn = gen_reg_rtx (mode);
45905 if (mask == NULL_RTX)
45907 machine_mode vmode;
45909 if (mode == SFmode)
45910 vmode = V4SFmode;
45911 else if (mode == DFmode)
45912 vmode = V2DFmode;
45913 else
45914 vmode = mode;
45916 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
45917 if (!VECTOR_MODE_P (mode))
45919 /* We need to generate a scalar mode mask in this case. */
45920 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45921 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45922 mask = gen_reg_rtx (mode);
45923 emit_insn (gen_rtx_SET (mask, tmp));
45926 else
45927 mask = gen_rtx_NOT (mode, mask);
45928 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
45929 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
45932 /* Expand fabs (OP0) and return a new rtx that holds the result. The
45933 mask for masking out the sign-bit is stored in *SMASK, if that is
45934 non-null. */
45935 static rtx
45936 ix86_expand_sse_fabs (rtx op0, rtx *smask)
45938 machine_mode vmode, mode = GET_MODE (op0);
45939 rtx xa, mask;
45941 xa = gen_reg_rtx (mode);
45942 if (mode == SFmode)
45943 vmode = V4SFmode;
45944 else if (mode == DFmode)
45945 vmode = V2DFmode;
45946 else
45947 vmode = mode;
45948 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
45949 if (!VECTOR_MODE_P (mode))
45951 /* We need to generate a scalar mode mask in this case. */
45952 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
45953 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
45954 mask = gen_reg_rtx (mode);
45955 emit_insn (gen_rtx_SET (mask, tmp));
45957 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
45959 if (smask)
45960 *smask = mask;
45962 return xa;
45965 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
45966 swapping the operands if SWAP_OPERANDS is true. The expanded
45967 code is a forward jump to a newly created label in case the
45968 comparison is true. The generated label rtx is returned. */
45969 static rtx_code_label *
45970 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
45971 bool swap_operands)
45973 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
45974 rtx_code_label *label;
45975 rtx tmp;
45977 if (swap_operands)
45978 std::swap (op0, op1);
45980 label = gen_label_rtx ();
45981 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
45982 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
45983 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
45984 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
45985 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
45986 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
45987 JUMP_LABEL (tmp) = label;
45989 return label;
45992 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
45993 using comparison code CODE. Operands are swapped for the comparison if
45994 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
45995 static rtx
45996 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
45997 bool swap_operands)
45999 rtx (*insn)(rtx, rtx, rtx, rtx);
46000 machine_mode mode = GET_MODE (op0);
46001 rtx mask = gen_reg_rtx (mode);
46003 if (swap_operands)
46004 std::swap (op0, op1);
46006 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46008 emit_insn (insn (mask, op0, op1,
46009 gen_rtx_fmt_ee (code, mode, op0, op1)));
46010 return mask;
46013 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46014 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46015 static rtx
46016 ix86_gen_TWO52 (machine_mode mode)
46018 REAL_VALUE_TYPE TWO52r;
46019 rtx TWO52;
46021 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46022 TWO52 = const_double_from_real_value (TWO52r, mode);
46023 TWO52 = force_reg (mode, TWO52);
46025 return TWO52;
46028 /* Expand SSE sequence for computing lround from OP1 storing
46029 into OP0. */
46030 void
46031 ix86_expand_lround (rtx op0, rtx op1)
46033 /* C code for the stuff we're doing below:
46034 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46035 return (long)tmp;
46037 machine_mode mode = GET_MODE (op1);
46038 const struct real_format *fmt;
46039 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46040 rtx adj;
46042 /* load nextafter (0.5, 0.0) */
46043 fmt = REAL_MODE_FORMAT (mode);
46044 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46045 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46047 /* adj = copysign (0.5, op1) */
46048 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46049 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46051 /* adj = op1 + adj */
46052 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46054 /* op0 = (imode)adj */
46055 expand_fix (op0, adj, 0);
46058 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46059 into OPERAND0. */
46060 void
46061 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46063 /* C code for the stuff we're doing below (for do_floor):
46064 xi = (long)op1;
46065 xi -= (double)xi > op1 ? 1 : 0;
46066 return xi;
46068 machine_mode fmode = GET_MODE (op1);
46069 machine_mode imode = GET_MODE (op0);
46070 rtx ireg, freg, tmp;
46071 rtx_code_label *label;
46073 /* reg = (long)op1 */
46074 ireg = gen_reg_rtx (imode);
46075 expand_fix (ireg, op1, 0);
46077 /* freg = (double)reg */
46078 freg = gen_reg_rtx (fmode);
46079 expand_float (freg, ireg, 0);
46081 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46082 label = ix86_expand_sse_compare_and_jump (UNLE,
46083 freg, op1, !do_floor);
46084 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46085 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46086 emit_move_insn (ireg, tmp);
46088 emit_label (label);
46089 LABEL_NUSES (label) = 1;
46091 emit_move_insn (op0, ireg);
46094 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46095 result in OPERAND0. */
46096 void
46097 ix86_expand_rint (rtx operand0, rtx operand1)
46099 /* C code for the stuff we're doing below:
46100 xa = fabs (operand1);
46101 if (!isless (xa, 2**52))
46102 return operand1;
46103 xa = xa + 2**52 - 2**52;
46104 return copysign (xa, operand1);
46106 machine_mode mode = GET_MODE (operand0);
46107 rtx res, xa, TWO52, mask;
46108 rtx_code_label *label;
46110 res = gen_reg_rtx (mode);
46111 emit_move_insn (res, operand1);
46113 /* xa = abs (operand1) */
46114 xa = ix86_expand_sse_fabs (res, &mask);
46116 /* if (!isless (xa, TWO52)) goto label; */
46117 TWO52 = ix86_gen_TWO52 (mode);
46118 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46120 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46121 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46123 ix86_sse_copysign_to_positive (res, xa, res, mask);
46125 emit_label (label);
46126 LABEL_NUSES (label) = 1;
46128 emit_move_insn (operand0, res);
46131 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46132 into OPERAND0. */
46133 void
46134 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46136 /* C code for the stuff we expand below.
46137 double xa = fabs (x), x2;
46138 if (!isless (xa, TWO52))
46139 return x;
46140 xa = xa + TWO52 - TWO52;
46141 x2 = copysign (xa, x);
46142 Compensate. Floor:
46143 if (x2 > x)
46144 x2 -= 1;
46145 Compensate. Ceil:
46146 if (x2 < x)
46147 x2 -= -1;
46148 return x2;
46150 machine_mode mode = GET_MODE (operand0);
46151 rtx xa, TWO52, tmp, one, res, mask;
46152 rtx_code_label *label;
46154 TWO52 = ix86_gen_TWO52 (mode);
46156 /* Temporary for holding the result, initialized to the input
46157 operand to ease control flow. */
46158 res = gen_reg_rtx (mode);
46159 emit_move_insn (res, operand1);
46161 /* xa = abs (operand1) */
46162 xa = ix86_expand_sse_fabs (res, &mask);
46164 /* if (!isless (xa, TWO52)) goto label; */
46165 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46167 /* xa = xa + TWO52 - TWO52; */
46168 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46169 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46171 /* xa = copysign (xa, operand1) */
46172 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46174 /* generate 1.0 or -1.0 */
46175 one = force_reg (mode,
46176 const_double_from_real_value (do_floor
46177 ? dconst1 : dconstm1, mode));
46179 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46180 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46181 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46182 /* We always need to subtract here to preserve signed zero. */
46183 tmp = expand_simple_binop (mode, MINUS,
46184 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46185 emit_move_insn (res, tmp);
46187 emit_label (label);
46188 LABEL_NUSES (label) = 1;
46190 emit_move_insn (operand0, res);
46193 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46194 into OPERAND0. */
46195 void
46196 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46198 /* C code for the stuff we expand below.
46199 double xa = fabs (x), x2;
46200 if (!isless (xa, TWO52))
46201 return x;
46202 x2 = (double)(long)x;
46203 Compensate. Floor:
46204 if (x2 > x)
46205 x2 -= 1;
46206 Compensate. Ceil:
46207 if (x2 < x)
46208 x2 += 1;
46209 if (HONOR_SIGNED_ZEROS (mode))
46210 return copysign (x2, x);
46211 return x2;
46213 machine_mode mode = GET_MODE (operand0);
46214 rtx xa, xi, TWO52, tmp, one, res, mask;
46215 rtx_code_label *label;
46217 TWO52 = ix86_gen_TWO52 (mode);
46219 /* Temporary for holding the result, initialized to the input
46220 operand to ease control flow. */
46221 res = gen_reg_rtx (mode);
46222 emit_move_insn (res, operand1);
46224 /* xa = abs (operand1) */
46225 xa = ix86_expand_sse_fabs (res, &mask);
46227 /* if (!isless (xa, TWO52)) goto label; */
46228 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46230 /* xa = (double)(long)x */
46231 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46232 expand_fix (xi, res, 0);
46233 expand_float (xa, xi, 0);
46235 /* generate 1.0 */
46236 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46238 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46239 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46240 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46241 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46242 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46243 emit_move_insn (res, tmp);
46245 if (HONOR_SIGNED_ZEROS (mode))
46246 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46248 emit_label (label);
46249 LABEL_NUSES (label) = 1;
46251 emit_move_insn (operand0, res);
46254 /* Expand SSE sequence for computing round from OPERAND1 storing
46255 into OPERAND0. Sequence that works without relying on DImode truncation
46256 via cvttsd2siq that is only available on 64bit targets. */
46257 void
46258 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46260 /* C code for the stuff we expand below.
46261 double xa = fabs (x), xa2, x2;
46262 if (!isless (xa, TWO52))
46263 return x;
46264 Using the absolute value and copying back sign makes
46265 -0.0 -> -0.0 correct.
46266 xa2 = xa + TWO52 - TWO52;
46267 Compensate.
46268 dxa = xa2 - xa;
46269 if (dxa <= -0.5)
46270 xa2 += 1;
46271 else if (dxa > 0.5)
46272 xa2 -= 1;
46273 x2 = copysign (xa2, x);
46274 return x2;
46276 machine_mode mode = GET_MODE (operand0);
46277 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46278 rtx_code_label *label;
46280 TWO52 = ix86_gen_TWO52 (mode);
46282 /* Temporary for holding the result, initialized to the input
46283 operand to ease control flow. */
46284 res = gen_reg_rtx (mode);
46285 emit_move_insn (res, operand1);
46287 /* xa = abs (operand1) */
46288 xa = ix86_expand_sse_fabs (res, &mask);
46290 /* if (!isless (xa, TWO52)) goto label; */
46291 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46293 /* xa2 = xa + TWO52 - TWO52; */
46294 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46295 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46297 /* dxa = xa2 - xa; */
46298 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46300 /* generate 0.5, 1.0 and -0.5 */
46301 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46302 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46303 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46304 0, OPTAB_DIRECT);
46306 /* Compensate. */
46307 tmp = gen_reg_rtx (mode);
46308 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46309 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46310 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46311 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46312 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46313 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46314 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46315 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46317 /* res = copysign (xa2, operand1) */
46318 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46320 emit_label (label);
46321 LABEL_NUSES (label) = 1;
46323 emit_move_insn (operand0, res);
46326 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46327 into OPERAND0. */
46328 void
46329 ix86_expand_trunc (rtx operand0, rtx operand1)
46331 /* C code for SSE variant we expand below.
46332 double xa = fabs (x), x2;
46333 if (!isless (xa, TWO52))
46334 return x;
46335 x2 = (double)(long)x;
46336 if (HONOR_SIGNED_ZEROS (mode))
46337 return copysign (x2, x);
46338 return x2;
46340 machine_mode mode = GET_MODE (operand0);
46341 rtx xa, xi, TWO52, res, mask;
46342 rtx_code_label *label;
46344 TWO52 = ix86_gen_TWO52 (mode);
46346 /* Temporary for holding the result, initialized to the input
46347 operand to ease control flow. */
46348 res = gen_reg_rtx (mode);
46349 emit_move_insn (res, operand1);
46351 /* xa = abs (operand1) */
46352 xa = ix86_expand_sse_fabs (res, &mask);
46354 /* if (!isless (xa, TWO52)) goto label; */
46355 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46357 /* x = (double)(long)x */
46358 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46359 expand_fix (xi, res, 0);
46360 expand_float (res, xi, 0);
46362 if (HONOR_SIGNED_ZEROS (mode))
46363 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46365 emit_label (label);
46366 LABEL_NUSES (label) = 1;
46368 emit_move_insn (operand0, res);
46371 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46372 into OPERAND0. */
46373 void
46374 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46376 machine_mode mode = GET_MODE (operand0);
46377 rtx xa, mask, TWO52, one, res, smask, tmp;
46378 rtx_code_label *label;
46380 /* C code for SSE variant we expand below.
46381 double xa = fabs (x), x2;
46382 if (!isless (xa, TWO52))
46383 return x;
46384 xa2 = xa + TWO52 - TWO52;
46385 Compensate:
46386 if (xa2 > xa)
46387 xa2 -= 1.0;
46388 x2 = copysign (xa2, x);
46389 return x2;
46392 TWO52 = ix86_gen_TWO52 (mode);
46394 /* Temporary for holding the result, initialized to the input
46395 operand to ease control flow. */
46396 res = gen_reg_rtx (mode);
46397 emit_move_insn (res, operand1);
46399 /* xa = abs (operand1) */
46400 xa = ix86_expand_sse_fabs (res, &smask);
46402 /* if (!isless (xa, TWO52)) goto label; */
46403 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46405 /* res = xa + TWO52 - TWO52; */
46406 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46407 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46408 emit_move_insn (res, tmp);
46410 /* generate 1.0 */
46411 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46413 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46414 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46415 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46416 tmp = expand_simple_binop (mode, MINUS,
46417 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46418 emit_move_insn (res, tmp);
46420 /* res = copysign (res, operand1) */
46421 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46423 emit_label (label);
46424 LABEL_NUSES (label) = 1;
46426 emit_move_insn (operand0, res);
46429 /* Expand SSE sequence for computing round from OPERAND1 storing
46430 into OPERAND0. */
46431 void
46432 ix86_expand_round (rtx operand0, rtx operand1)
46434 /* C code for the stuff we're doing below:
46435 double xa = fabs (x);
46436 if (!isless (xa, TWO52))
46437 return x;
46438 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46439 return copysign (xa, x);
46441 machine_mode mode = GET_MODE (operand0);
46442 rtx res, TWO52, xa, xi, half, mask;
46443 rtx_code_label *label;
46444 const struct real_format *fmt;
46445 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46447 /* Temporary for holding the result, initialized to the input
46448 operand to ease control flow. */
46449 res = gen_reg_rtx (mode);
46450 emit_move_insn (res, operand1);
46452 TWO52 = ix86_gen_TWO52 (mode);
46453 xa = ix86_expand_sse_fabs (res, &mask);
46454 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46456 /* load nextafter (0.5, 0.0) */
46457 fmt = REAL_MODE_FORMAT (mode);
46458 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46459 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46461 /* xa = xa + 0.5 */
46462 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46463 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46465 /* xa = (double)(int64_t)xa */
46466 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46467 expand_fix (xi, xa, 0);
46468 expand_float (xa, xi, 0);
46470 /* res = copysign (xa, operand1) */
46471 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46473 emit_label (label);
46474 LABEL_NUSES (label) = 1;
46476 emit_move_insn (operand0, res);
46479 /* Expand SSE sequence for computing round
46480 from OP1 storing into OP0 using sse4 round insn. */
46481 void
46482 ix86_expand_round_sse4 (rtx op0, rtx op1)
46484 machine_mode mode = GET_MODE (op0);
46485 rtx e1, e2, res, half;
46486 const struct real_format *fmt;
46487 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46488 rtx (*gen_copysign) (rtx, rtx, rtx);
46489 rtx (*gen_round) (rtx, rtx, rtx);
46491 switch (mode)
46493 case SFmode:
46494 gen_copysign = gen_copysignsf3;
46495 gen_round = gen_sse4_1_roundsf2;
46496 break;
46497 case DFmode:
46498 gen_copysign = gen_copysigndf3;
46499 gen_round = gen_sse4_1_rounddf2;
46500 break;
46501 default:
46502 gcc_unreachable ();
46505 /* round (a) = trunc (a + copysign (0.5, a)) */
46507 /* load nextafter (0.5, 0.0) */
46508 fmt = REAL_MODE_FORMAT (mode);
46509 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46510 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46511 half = const_double_from_real_value (pred_half, mode);
46513 /* e1 = copysign (0.5, op1) */
46514 e1 = gen_reg_rtx (mode);
46515 emit_insn (gen_copysign (e1, half, op1));
46517 /* e2 = op1 + e1 */
46518 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46520 /* res = trunc (e2) */
46521 res = gen_reg_rtx (mode);
46522 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46524 emit_move_insn (op0, res);
46528 /* Table of valid machine attributes. */
46529 static const struct attribute_spec ix86_attribute_table[] =
46531 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46532 affects_type_identity } */
46533 /* Stdcall attribute says callee is responsible for popping arguments
46534 if they are not variable. */
46535 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46536 true },
46537 /* Fastcall attribute says callee is responsible for popping arguments
46538 if they are not variable. */
46539 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46540 true },
46541 /* Thiscall attribute says callee is responsible for popping arguments
46542 if they are not variable. */
46543 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46544 true },
46545 /* Cdecl attribute says the callee is a normal C declaration */
46546 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46547 true },
46548 /* Regparm attribute specifies how many integer arguments are to be
46549 passed in registers. */
46550 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46551 true },
46552 /* Sseregparm attribute says we are using x86_64 calling conventions
46553 for FP arguments. */
46554 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46555 true },
46556 /* The transactional memory builtins are implicitly regparm or fastcall
46557 depending on the ABI. Override the generic do-nothing attribute that
46558 these builtins were declared with. */
46559 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46560 true },
46561 /* force_align_arg_pointer says this function realigns the stack at entry. */
46562 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46563 false, true, true, ix86_handle_cconv_attribute, false },
46564 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46565 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46566 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46567 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46568 false },
46569 #endif
46570 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46571 false },
46572 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46573 false },
46574 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46575 SUBTARGET_ATTRIBUTE_TABLE,
46576 #endif
46577 /* ms_abi and sysv_abi calling convention function attributes. */
46578 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46579 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46580 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46581 false },
46582 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46583 ix86_handle_callee_pop_aggregate_return, true },
46584 /* End element. */
46585 { NULL, 0, 0, false, false, false, NULL, false }
46588 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46589 static int
46590 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46591 tree vectype, int)
46593 unsigned elements;
46595 switch (type_of_cost)
46597 case scalar_stmt:
46598 return ix86_cost->scalar_stmt_cost;
46600 case scalar_load:
46601 return ix86_cost->scalar_load_cost;
46603 case scalar_store:
46604 return ix86_cost->scalar_store_cost;
46606 case vector_stmt:
46607 return ix86_cost->vec_stmt_cost;
46609 case vector_load:
46610 return ix86_cost->vec_align_load_cost;
46612 case vector_store:
46613 return ix86_cost->vec_store_cost;
46615 case vec_to_scalar:
46616 return ix86_cost->vec_to_scalar_cost;
46618 case scalar_to_vec:
46619 return ix86_cost->scalar_to_vec_cost;
46621 case unaligned_load:
46622 case unaligned_store:
46623 return ix86_cost->vec_unalign_load_cost;
46625 case cond_branch_taken:
46626 return ix86_cost->cond_taken_branch_cost;
46628 case cond_branch_not_taken:
46629 return ix86_cost->cond_not_taken_branch_cost;
46631 case vec_perm:
46632 case vec_promote_demote:
46633 return ix86_cost->vec_stmt_cost;
46635 case vec_construct:
46636 elements = TYPE_VECTOR_SUBPARTS (vectype);
46637 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
46639 default:
46640 gcc_unreachable ();
46644 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46645 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46646 insn every time. */
46648 static GTY(()) rtx_insn *vselect_insn;
46650 /* Initialize vselect_insn. */
46652 static void
46653 init_vselect_insn (void)
46655 unsigned i;
46656 rtx x;
46658 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46659 for (i = 0; i < MAX_VECT_LEN; ++i)
46660 XVECEXP (x, 0, i) = const0_rtx;
46661 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46662 const0_rtx), x);
46663 x = gen_rtx_SET (const0_rtx, x);
46664 start_sequence ();
46665 vselect_insn = emit_insn (x);
46666 end_sequence ();
46669 /* Construct (set target (vec_select op0 (parallel perm))) and
46670 return true if that's a valid instruction in the active ISA. */
46672 static bool
46673 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46674 unsigned nelt, bool testing_p)
46676 unsigned int i;
46677 rtx x, save_vconcat;
46678 int icode;
46680 if (vselect_insn == NULL_RTX)
46681 init_vselect_insn ();
46683 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46684 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46685 for (i = 0; i < nelt; ++i)
46686 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46687 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46688 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46689 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46690 SET_DEST (PATTERN (vselect_insn)) = target;
46691 icode = recog_memoized (vselect_insn);
46693 if (icode >= 0 && !testing_p)
46694 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46696 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46697 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46698 INSN_CODE (vselect_insn) = -1;
46700 return icode >= 0;
46703 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46705 static bool
46706 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46707 const unsigned char *perm, unsigned nelt,
46708 bool testing_p)
46710 machine_mode v2mode;
46711 rtx x;
46712 bool ok;
46714 if (vselect_insn == NULL_RTX)
46715 init_vselect_insn ();
46717 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46718 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46719 PUT_MODE (x, v2mode);
46720 XEXP (x, 0) = op0;
46721 XEXP (x, 1) = op1;
46722 ok = expand_vselect (target, x, perm, nelt, testing_p);
46723 XEXP (x, 0) = const0_rtx;
46724 XEXP (x, 1) = const0_rtx;
46725 return ok;
46728 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46729 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46731 static bool
46732 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46734 machine_mode vmode = d->vmode;
46735 unsigned i, mask, nelt = d->nelt;
46736 rtx target, op0, op1, x;
46737 rtx rperm[32], vperm;
46739 if (d->one_operand_p)
46740 return false;
46741 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46742 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46744 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46746 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46748 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46750 else
46751 return false;
46753 /* This is a blend, not a permute. Elements must stay in their
46754 respective lanes. */
46755 for (i = 0; i < nelt; ++i)
46757 unsigned e = d->perm[i];
46758 if (!(e == i || e == i + nelt))
46759 return false;
46762 if (d->testing_p)
46763 return true;
46765 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46766 decision should be extracted elsewhere, so that we only try that
46767 sequence once all budget==3 options have been tried. */
46768 target = d->target;
46769 op0 = d->op0;
46770 op1 = d->op1;
46771 mask = 0;
46773 switch (vmode)
46775 case V8DFmode:
46776 case V16SFmode:
46777 case V4DFmode:
46778 case V8SFmode:
46779 case V2DFmode:
46780 case V4SFmode:
46781 case V8HImode:
46782 case V8SImode:
46783 case V32HImode:
46784 case V64QImode:
46785 case V16SImode:
46786 case V8DImode:
46787 for (i = 0; i < nelt; ++i)
46788 mask |= (d->perm[i] >= nelt) << i;
46789 break;
46791 case V2DImode:
46792 for (i = 0; i < 2; ++i)
46793 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46794 vmode = V8HImode;
46795 goto do_subreg;
46797 case V4SImode:
46798 for (i = 0; i < 4; ++i)
46799 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46800 vmode = V8HImode;
46801 goto do_subreg;
46803 case V16QImode:
46804 /* See if bytes move in pairs so we can use pblendw with
46805 an immediate argument, rather than pblendvb with a vector
46806 argument. */
46807 for (i = 0; i < 16; i += 2)
46808 if (d->perm[i] + 1 != d->perm[i + 1])
46810 use_pblendvb:
46811 for (i = 0; i < nelt; ++i)
46812 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46814 finish_pblendvb:
46815 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46816 vperm = force_reg (vmode, vperm);
46818 if (GET_MODE_SIZE (vmode) == 16)
46819 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46820 else
46821 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46822 if (target != d->target)
46823 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46824 return true;
46827 for (i = 0; i < 8; ++i)
46828 mask |= (d->perm[i * 2] >= 16) << i;
46829 vmode = V8HImode;
46830 /* FALLTHRU */
46832 do_subreg:
46833 target = gen_reg_rtx (vmode);
46834 op0 = gen_lowpart (vmode, op0);
46835 op1 = gen_lowpart (vmode, op1);
46836 break;
46838 case V32QImode:
46839 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46840 for (i = 0; i < 32; i += 2)
46841 if (d->perm[i] + 1 != d->perm[i + 1])
46842 goto use_pblendvb;
46843 /* See if bytes move in quadruplets. If yes, vpblendd
46844 with immediate can be used. */
46845 for (i = 0; i < 32; i += 4)
46846 if (d->perm[i] + 2 != d->perm[i + 2])
46847 break;
46848 if (i < 32)
46850 /* See if bytes move the same in both lanes. If yes,
46851 vpblendw with immediate can be used. */
46852 for (i = 0; i < 16; i += 2)
46853 if (d->perm[i] + 16 != d->perm[i + 16])
46854 goto use_pblendvb;
46856 /* Use vpblendw. */
46857 for (i = 0; i < 16; ++i)
46858 mask |= (d->perm[i * 2] >= 32) << i;
46859 vmode = V16HImode;
46860 goto do_subreg;
46863 /* Use vpblendd. */
46864 for (i = 0; i < 8; ++i)
46865 mask |= (d->perm[i * 4] >= 32) << i;
46866 vmode = V8SImode;
46867 goto do_subreg;
46869 case V16HImode:
46870 /* See if words move in pairs. If yes, vpblendd can be used. */
46871 for (i = 0; i < 16; i += 2)
46872 if (d->perm[i] + 1 != d->perm[i + 1])
46873 break;
46874 if (i < 16)
46876 /* See if words move the same in both lanes. If not,
46877 vpblendvb must be used. */
46878 for (i = 0; i < 8; i++)
46879 if (d->perm[i] + 8 != d->perm[i + 8])
46881 /* Use vpblendvb. */
46882 for (i = 0; i < 32; ++i)
46883 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46885 vmode = V32QImode;
46886 nelt = 32;
46887 target = gen_reg_rtx (vmode);
46888 op0 = gen_lowpart (vmode, op0);
46889 op1 = gen_lowpart (vmode, op1);
46890 goto finish_pblendvb;
46893 /* Use vpblendw. */
46894 for (i = 0; i < 16; ++i)
46895 mask |= (d->perm[i] >= 16) << i;
46896 break;
46899 /* Use vpblendd. */
46900 for (i = 0; i < 8; ++i)
46901 mask |= (d->perm[i * 2] >= 16) << i;
46902 vmode = V8SImode;
46903 goto do_subreg;
46905 case V4DImode:
46906 /* Use vpblendd. */
46907 for (i = 0; i < 4; ++i)
46908 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46909 vmode = V8SImode;
46910 goto do_subreg;
46912 default:
46913 gcc_unreachable ();
46916 /* This matches five different patterns with the different modes. */
46917 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
46918 x = gen_rtx_SET (target, x);
46919 emit_insn (x);
46920 if (target != d->target)
46921 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46923 return true;
46926 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46927 in terms of the variable form of vpermilps.
46929 Note that we will have already failed the immediate input vpermilps,
46930 which requires that the high and low part shuffle be identical; the
46931 variable form doesn't require that. */
46933 static bool
46934 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
46936 rtx rperm[8], vperm;
46937 unsigned i;
46939 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
46940 return false;
46942 /* We can only permute within the 128-bit lane. */
46943 for (i = 0; i < 8; ++i)
46945 unsigned e = d->perm[i];
46946 if (i < 4 ? e >= 4 : e < 4)
46947 return false;
46950 if (d->testing_p)
46951 return true;
46953 for (i = 0; i < 8; ++i)
46955 unsigned e = d->perm[i];
46957 /* Within each 128-bit lane, the elements of op0 are numbered
46958 from 0 and the elements of op1 are numbered from 4. */
46959 if (e >= 8 + 4)
46960 e -= 8;
46961 else if (e >= 4)
46962 e -= 4;
46964 rperm[i] = GEN_INT (e);
46967 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
46968 vperm = force_reg (V8SImode, vperm);
46969 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
46971 return true;
46974 /* Return true if permutation D can be performed as VMODE permutation
46975 instead. */
46977 static bool
46978 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
46980 unsigned int i, j, chunk;
46982 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
46983 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
46984 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
46985 return false;
46987 if (GET_MODE_NUNITS (vmode) >= d->nelt)
46988 return true;
46990 chunk = d->nelt / GET_MODE_NUNITS (vmode);
46991 for (i = 0; i < d->nelt; i += chunk)
46992 if (d->perm[i] & (chunk - 1))
46993 return false;
46994 else
46995 for (j = 1; j < chunk; ++j)
46996 if (d->perm[i] + j != d->perm[i + j])
46997 return false;
46999 return true;
47002 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47003 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47005 static bool
47006 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47008 unsigned i, nelt, eltsz, mask;
47009 unsigned char perm[64];
47010 machine_mode vmode = V16QImode;
47011 rtx rperm[64], vperm, target, op0, op1;
47013 nelt = d->nelt;
47015 if (!d->one_operand_p)
47017 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47019 if (TARGET_AVX2
47020 && valid_perm_using_mode_p (V2TImode, d))
47022 if (d->testing_p)
47023 return true;
47025 /* Use vperm2i128 insn. The pattern uses
47026 V4DImode instead of V2TImode. */
47027 target = d->target;
47028 if (d->vmode != V4DImode)
47029 target = gen_reg_rtx (V4DImode);
47030 op0 = gen_lowpart (V4DImode, d->op0);
47031 op1 = gen_lowpart (V4DImode, d->op1);
47032 rperm[0]
47033 = GEN_INT ((d->perm[0] / (nelt / 2))
47034 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47035 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47036 if (target != d->target)
47037 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47038 return true;
47040 return false;
47043 else
47045 if (GET_MODE_SIZE (d->vmode) == 16)
47047 if (!TARGET_SSSE3)
47048 return false;
47050 else if (GET_MODE_SIZE (d->vmode) == 32)
47052 if (!TARGET_AVX2)
47053 return false;
47055 /* V4DImode should be already handled through
47056 expand_vselect by vpermq instruction. */
47057 gcc_assert (d->vmode != V4DImode);
47059 vmode = V32QImode;
47060 if (d->vmode == V8SImode
47061 || d->vmode == V16HImode
47062 || d->vmode == V32QImode)
47064 /* First see if vpermq can be used for
47065 V8SImode/V16HImode/V32QImode. */
47066 if (valid_perm_using_mode_p (V4DImode, d))
47068 for (i = 0; i < 4; i++)
47069 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47070 if (d->testing_p)
47071 return true;
47072 target = gen_reg_rtx (V4DImode);
47073 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47074 perm, 4, false))
47076 emit_move_insn (d->target,
47077 gen_lowpart (d->vmode, target));
47078 return true;
47080 return false;
47083 /* Next see if vpermd can be used. */
47084 if (valid_perm_using_mode_p (V8SImode, d))
47085 vmode = V8SImode;
47087 /* Or if vpermps can be used. */
47088 else if (d->vmode == V8SFmode)
47089 vmode = V8SImode;
47091 if (vmode == V32QImode)
47093 /* vpshufb only works intra lanes, it is not
47094 possible to shuffle bytes in between the lanes. */
47095 for (i = 0; i < nelt; ++i)
47096 if ((d->perm[i] ^ i) & (nelt / 2))
47097 return false;
47100 else if (GET_MODE_SIZE (d->vmode) == 64)
47102 if (!TARGET_AVX512BW)
47103 return false;
47105 /* If vpermq didn't work, vpshufb won't work either. */
47106 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47107 return false;
47109 vmode = V64QImode;
47110 if (d->vmode == V16SImode
47111 || d->vmode == V32HImode
47112 || d->vmode == V64QImode)
47114 /* First see if vpermq can be used for
47115 V16SImode/V32HImode/V64QImode. */
47116 if (valid_perm_using_mode_p (V8DImode, d))
47118 for (i = 0; i < 8; i++)
47119 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47120 if (d->testing_p)
47121 return true;
47122 target = gen_reg_rtx (V8DImode);
47123 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47124 perm, 8, false))
47126 emit_move_insn (d->target,
47127 gen_lowpart (d->vmode, target));
47128 return true;
47130 return false;
47133 /* Next see if vpermd can be used. */
47134 if (valid_perm_using_mode_p (V16SImode, d))
47135 vmode = V16SImode;
47137 /* Or if vpermps can be used. */
47138 else if (d->vmode == V16SFmode)
47139 vmode = V16SImode;
47140 if (vmode == V64QImode)
47142 /* vpshufb only works intra lanes, it is not
47143 possible to shuffle bytes in between the lanes. */
47144 for (i = 0; i < nelt; ++i)
47145 if ((d->perm[i] ^ i) & (nelt / 4))
47146 return false;
47149 else
47150 return false;
47153 if (d->testing_p)
47154 return true;
47156 if (vmode == V8SImode)
47157 for (i = 0; i < 8; ++i)
47158 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47159 else if (vmode == V16SImode)
47160 for (i = 0; i < 16; ++i)
47161 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47162 else
47164 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47165 if (!d->one_operand_p)
47166 mask = 2 * nelt - 1;
47167 else if (vmode == V16QImode)
47168 mask = nelt - 1;
47169 else if (vmode == V64QImode)
47170 mask = nelt / 4 - 1;
47171 else
47172 mask = nelt / 2 - 1;
47174 for (i = 0; i < nelt; ++i)
47176 unsigned j, e = d->perm[i] & mask;
47177 for (j = 0; j < eltsz; ++j)
47178 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47182 vperm = gen_rtx_CONST_VECTOR (vmode,
47183 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47184 vperm = force_reg (vmode, vperm);
47186 target = d->target;
47187 if (d->vmode != vmode)
47188 target = gen_reg_rtx (vmode);
47189 op0 = gen_lowpart (vmode, d->op0);
47190 if (d->one_operand_p)
47192 if (vmode == V16QImode)
47193 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47194 else if (vmode == V32QImode)
47195 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47196 else if (vmode == V64QImode)
47197 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47198 else if (vmode == V8SFmode)
47199 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47200 else if (vmode == V8SImode)
47201 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47202 else if (vmode == V16SFmode)
47203 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47204 else if (vmode == V16SImode)
47205 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47206 else
47207 gcc_unreachable ();
47209 else
47211 op1 = gen_lowpart (vmode, d->op1);
47212 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47214 if (target != d->target)
47215 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47217 return true;
47220 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47221 in a single instruction. */
47223 static bool
47224 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47226 unsigned i, nelt = d->nelt;
47227 unsigned char perm2[MAX_VECT_LEN];
47229 /* Check plain VEC_SELECT first, because AVX has instructions that could
47230 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47231 input where SEL+CONCAT may not. */
47232 if (d->one_operand_p)
47234 int mask = nelt - 1;
47235 bool identity_perm = true;
47236 bool broadcast_perm = true;
47238 for (i = 0; i < nelt; i++)
47240 perm2[i] = d->perm[i] & mask;
47241 if (perm2[i] != i)
47242 identity_perm = false;
47243 if (perm2[i])
47244 broadcast_perm = false;
47247 if (identity_perm)
47249 if (!d->testing_p)
47250 emit_move_insn (d->target, d->op0);
47251 return true;
47253 else if (broadcast_perm && TARGET_AVX2)
47255 /* Use vpbroadcast{b,w,d}. */
47256 rtx (*gen) (rtx, rtx) = NULL;
47257 switch (d->vmode)
47259 case V64QImode:
47260 if (TARGET_AVX512BW)
47261 gen = gen_avx512bw_vec_dupv64qi_1;
47262 break;
47263 case V32QImode:
47264 gen = gen_avx2_pbroadcastv32qi_1;
47265 break;
47266 case V32HImode:
47267 if (TARGET_AVX512BW)
47268 gen = gen_avx512bw_vec_dupv32hi_1;
47269 break;
47270 case V16HImode:
47271 gen = gen_avx2_pbroadcastv16hi_1;
47272 break;
47273 case V16SImode:
47274 if (TARGET_AVX512F)
47275 gen = gen_avx512f_vec_dupv16si_1;
47276 break;
47277 case V8SImode:
47278 gen = gen_avx2_pbroadcastv8si_1;
47279 break;
47280 case V16QImode:
47281 gen = gen_avx2_pbroadcastv16qi;
47282 break;
47283 case V8HImode:
47284 gen = gen_avx2_pbroadcastv8hi;
47285 break;
47286 case V16SFmode:
47287 if (TARGET_AVX512F)
47288 gen = gen_avx512f_vec_dupv16sf_1;
47289 break;
47290 case V8SFmode:
47291 gen = gen_avx2_vec_dupv8sf_1;
47292 break;
47293 case V8DFmode:
47294 if (TARGET_AVX512F)
47295 gen = gen_avx512f_vec_dupv8df_1;
47296 break;
47297 case V8DImode:
47298 if (TARGET_AVX512F)
47299 gen = gen_avx512f_vec_dupv8di_1;
47300 break;
47301 /* For other modes prefer other shuffles this function creates. */
47302 default: break;
47304 if (gen != NULL)
47306 if (!d->testing_p)
47307 emit_insn (gen (d->target, d->op0));
47308 return true;
47312 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47313 return true;
47315 /* There are plenty of patterns in sse.md that are written for
47316 SEL+CONCAT and are not replicated for a single op. Perhaps
47317 that should be changed, to avoid the nastiness here. */
47319 /* Recognize interleave style patterns, which means incrementing
47320 every other permutation operand. */
47321 for (i = 0; i < nelt; i += 2)
47323 perm2[i] = d->perm[i] & mask;
47324 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47326 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47327 d->testing_p))
47328 return true;
47330 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47331 if (nelt >= 4)
47333 for (i = 0; i < nelt; i += 4)
47335 perm2[i + 0] = d->perm[i + 0] & mask;
47336 perm2[i + 1] = d->perm[i + 1] & mask;
47337 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47338 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47341 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47342 d->testing_p))
47343 return true;
47347 /* Finally, try the fully general two operand permute. */
47348 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47349 d->testing_p))
47350 return true;
47352 /* Recognize interleave style patterns with reversed operands. */
47353 if (!d->one_operand_p)
47355 for (i = 0; i < nelt; ++i)
47357 unsigned e = d->perm[i];
47358 if (e >= nelt)
47359 e -= nelt;
47360 else
47361 e += nelt;
47362 perm2[i] = e;
47365 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47366 d->testing_p))
47367 return true;
47370 /* Try the SSE4.1 blend variable merge instructions. */
47371 if (expand_vec_perm_blend (d))
47372 return true;
47374 /* Try one of the AVX vpermil variable permutations. */
47375 if (expand_vec_perm_vpermil (d))
47376 return true;
47378 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47379 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47380 if (expand_vec_perm_pshufb (d))
47381 return true;
47383 /* Try the AVX2 vpalignr instruction. */
47384 if (expand_vec_perm_palignr (d, true))
47385 return true;
47387 /* Try the AVX512F vpermi2 instructions. */
47388 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47389 return true;
47391 return false;
47394 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47395 in terms of a pair of pshuflw + pshufhw instructions. */
47397 static bool
47398 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47400 unsigned char perm2[MAX_VECT_LEN];
47401 unsigned i;
47402 bool ok;
47404 if (d->vmode != V8HImode || !d->one_operand_p)
47405 return false;
47407 /* The two permutations only operate in 64-bit lanes. */
47408 for (i = 0; i < 4; ++i)
47409 if (d->perm[i] >= 4)
47410 return false;
47411 for (i = 4; i < 8; ++i)
47412 if (d->perm[i] < 4)
47413 return false;
47415 if (d->testing_p)
47416 return true;
47418 /* Emit the pshuflw. */
47419 memcpy (perm2, d->perm, 4);
47420 for (i = 4; i < 8; ++i)
47421 perm2[i] = i;
47422 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47423 gcc_assert (ok);
47425 /* Emit the pshufhw. */
47426 memcpy (perm2 + 4, d->perm + 4, 4);
47427 for (i = 0; i < 4; ++i)
47428 perm2[i] = i;
47429 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47430 gcc_assert (ok);
47432 return true;
47435 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47436 the permutation using the SSSE3 palignr instruction. This succeeds
47437 when all of the elements in PERM fit within one vector and we merely
47438 need to shift them down so that a single vector permutation has a
47439 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47440 the vpalignr instruction itself can perform the requested permutation. */
47442 static bool
47443 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47445 unsigned i, nelt = d->nelt;
47446 unsigned min, max, minswap, maxswap;
47447 bool in_order, ok, swap = false;
47448 rtx shift, target;
47449 struct expand_vec_perm_d dcopy;
47451 /* Even with AVX, palignr only operates on 128-bit vectors,
47452 in AVX2 palignr operates on both 128-bit lanes. */
47453 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47454 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47455 return false;
47457 min = 2 * nelt;
47458 max = 0;
47459 minswap = 2 * nelt;
47460 maxswap = 0;
47461 for (i = 0; i < nelt; ++i)
47463 unsigned e = d->perm[i];
47464 unsigned eswap = d->perm[i] ^ nelt;
47465 if (GET_MODE_SIZE (d->vmode) == 32)
47467 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47468 eswap = e ^ (nelt / 2);
47470 if (e < min)
47471 min = e;
47472 if (e > max)
47473 max = e;
47474 if (eswap < minswap)
47475 minswap = eswap;
47476 if (eswap > maxswap)
47477 maxswap = eswap;
47479 if (min == 0
47480 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47482 if (d->one_operand_p
47483 || minswap == 0
47484 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47485 ? nelt / 2 : nelt))
47486 return false;
47487 swap = true;
47488 min = minswap;
47489 max = maxswap;
47492 /* Given that we have SSSE3, we know we'll be able to implement the
47493 single operand permutation after the palignr with pshufb for
47494 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47495 first. */
47496 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47497 return true;
47499 dcopy = *d;
47500 if (swap)
47502 dcopy.op0 = d->op1;
47503 dcopy.op1 = d->op0;
47504 for (i = 0; i < nelt; ++i)
47505 dcopy.perm[i] ^= nelt;
47508 in_order = true;
47509 for (i = 0; i < nelt; ++i)
47511 unsigned e = dcopy.perm[i];
47512 if (GET_MODE_SIZE (d->vmode) == 32
47513 && e >= nelt
47514 && (e & (nelt / 2 - 1)) < min)
47515 e = e - min - (nelt / 2);
47516 else
47517 e = e - min;
47518 if (e != i)
47519 in_order = false;
47520 dcopy.perm[i] = e;
47522 dcopy.one_operand_p = true;
47524 if (single_insn_only_p && !in_order)
47525 return false;
47527 /* For AVX2, test whether we can permute the result in one instruction. */
47528 if (d->testing_p)
47530 if (in_order)
47531 return true;
47532 dcopy.op1 = dcopy.op0;
47533 return expand_vec_perm_1 (&dcopy);
47536 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47537 if (GET_MODE_SIZE (d->vmode) == 16)
47539 target = gen_reg_rtx (TImode);
47540 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47541 gen_lowpart (TImode, dcopy.op0), shift));
47543 else
47545 target = gen_reg_rtx (V2TImode);
47546 emit_insn (gen_avx2_palignrv2ti (target,
47547 gen_lowpart (V2TImode, dcopy.op1),
47548 gen_lowpart (V2TImode, dcopy.op0),
47549 shift));
47552 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47554 /* Test for the degenerate case where the alignment by itself
47555 produces the desired permutation. */
47556 if (in_order)
47558 emit_move_insn (d->target, dcopy.op0);
47559 return true;
47562 ok = expand_vec_perm_1 (&dcopy);
47563 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47565 return ok;
47568 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47569 the permutation using the SSE4_1 pblendv instruction. Potentially
47570 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47572 static bool
47573 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47575 unsigned i, which, nelt = d->nelt;
47576 struct expand_vec_perm_d dcopy, dcopy1;
47577 machine_mode vmode = d->vmode;
47578 bool ok;
47580 /* Use the same checks as in expand_vec_perm_blend. */
47581 if (d->one_operand_p)
47582 return false;
47583 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47585 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47587 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47589 else
47590 return false;
47592 /* Figure out where permutation elements stay not in their
47593 respective lanes. */
47594 for (i = 0, which = 0; i < nelt; ++i)
47596 unsigned e = d->perm[i];
47597 if (e != i)
47598 which |= (e < nelt ? 1 : 2);
47600 /* We can pblend the part where elements stay not in their
47601 respective lanes only when these elements are all in one
47602 half of a permutation.
47603 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47604 lanes, but both 8 and 9 >= 8
47605 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47606 respective lanes and 8 >= 8, but 2 not. */
47607 if (which != 1 && which != 2)
47608 return false;
47609 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47610 return true;
47612 /* First we apply one operand permutation to the part where
47613 elements stay not in their respective lanes. */
47614 dcopy = *d;
47615 if (which == 2)
47616 dcopy.op0 = dcopy.op1 = d->op1;
47617 else
47618 dcopy.op0 = dcopy.op1 = d->op0;
47619 if (!d->testing_p)
47620 dcopy.target = gen_reg_rtx (vmode);
47621 dcopy.one_operand_p = true;
47623 for (i = 0; i < nelt; ++i)
47624 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47626 ok = expand_vec_perm_1 (&dcopy);
47627 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47628 return false;
47629 else
47630 gcc_assert (ok);
47631 if (d->testing_p)
47632 return true;
47634 /* Next we put permuted elements into their positions. */
47635 dcopy1 = *d;
47636 if (which == 2)
47637 dcopy1.op1 = dcopy.target;
47638 else
47639 dcopy1.op0 = dcopy.target;
47641 for (i = 0; i < nelt; ++i)
47642 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47644 ok = expand_vec_perm_blend (&dcopy1);
47645 gcc_assert (ok);
47647 return true;
47650 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47652 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47653 a two vector permutation into a single vector permutation by using
47654 an interleave operation to merge the vectors. */
47656 static bool
47657 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47659 struct expand_vec_perm_d dremap, dfinal;
47660 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47661 unsigned HOST_WIDE_INT contents;
47662 unsigned char remap[2 * MAX_VECT_LEN];
47663 rtx_insn *seq;
47664 bool ok, same_halves = false;
47666 if (GET_MODE_SIZE (d->vmode) == 16)
47668 if (d->one_operand_p)
47669 return false;
47671 else if (GET_MODE_SIZE (d->vmode) == 32)
47673 if (!TARGET_AVX)
47674 return false;
47675 /* For 32-byte modes allow even d->one_operand_p.
47676 The lack of cross-lane shuffling in some instructions
47677 might prevent a single insn shuffle. */
47678 dfinal = *d;
47679 dfinal.testing_p = true;
47680 /* If expand_vec_perm_interleave3 can expand this into
47681 a 3 insn sequence, give up and let it be expanded as
47682 3 insn sequence. While that is one insn longer,
47683 it doesn't need a memory operand and in the common
47684 case that both interleave low and high permutations
47685 with the same operands are adjacent needs 4 insns
47686 for both after CSE. */
47687 if (expand_vec_perm_interleave3 (&dfinal))
47688 return false;
47690 else
47691 return false;
47693 /* Examine from whence the elements come. */
47694 contents = 0;
47695 for (i = 0; i < nelt; ++i)
47696 contents |= HOST_WIDE_INT_1U << d->perm[i];
47698 memset (remap, 0xff, sizeof (remap));
47699 dremap = *d;
47701 if (GET_MODE_SIZE (d->vmode) == 16)
47703 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47705 /* Split the two input vectors into 4 halves. */
47706 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
47707 h2 = h1 << nelt2;
47708 h3 = h2 << nelt2;
47709 h4 = h3 << nelt2;
47711 /* If the elements from the low halves use interleave low, and similarly
47712 for interleave high. If the elements are from mis-matched halves, we
47713 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47714 if ((contents & (h1 | h3)) == contents)
47716 /* punpckl* */
47717 for (i = 0; i < nelt2; ++i)
47719 remap[i] = i * 2;
47720 remap[i + nelt] = i * 2 + 1;
47721 dremap.perm[i * 2] = i;
47722 dremap.perm[i * 2 + 1] = i + nelt;
47724 if (!TARGET_SSE2 && d->vmode == V4SImode)
47725 dremap.vmode = V4SFmode;
47727 else if ((contents & (h2 | h4)) == contents)
47729 /* punpckh* */
47730 for (i = 0; i < nelt2; ++i)
47732 remap[i + nelt2] = i * 2;
47733 remap[i + nelt + nelt2] = i * 2 + 1;
47734 dremap.perm[i * 2] = i + nelt2;
47735 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47737 if (!TARGET_SSE2 && d->vmode == V4SImode)
47738 dremap.vmode = V4SFmode;
47740 else if ((contents & (h1 | h4)) == contents)
47742 /* shufps */
47743 for (i = 0; i < nelt2; ++i)
47745 remap[i] = i;
47746 remap[i + nelt + nelt2] = i + nelt2;
47747 dremap.perm[i] = i;
47748 dremap.perm[i + nelt2] = i + nelt + nelt2;
47750 if (nelt != 4)
47752 /* shufpd */
47753 dremap.vmode = V2DImode;
47754 dremap.nelt = 2;
47755 dremap.perm[0] = 0;
47756 dremap.perm[1] = 3;
47759 else if ((contents & (h2 | h3)) == contents)
47761 /* shufps */
47762 for (i = 0; i < nelt2; ++i)
47764 remap[i + nelt2] = i;
47765 remap[i + nelt] = i + nelt2;
47766 dremap.perm[i] = i + nelt2;
47767 dremap.perm[i + nelt2] = i + nelt;
47769 if (nelt != 4)
47771 /* shufpd */
47772 dremap.vmode = V2DImode;
47773 dremap.nelt = 2;
47774 dremap.perm[0] = 1;
47775 dremap.perm[1] = 2;
47778 else
47779 return false;
47781 else
47783 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47784 unsigned HOST_WIDE_INT q[8];
47785 unsigned int nonzero_halves[4];
47787 /* Split the two input vectors into 8 quarters. */
47788 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
47789 for (i = 1; i < 8; ++i)
47790 q[i] = q[0] << (nelt4 * i);
47791 for (i = 0; i < 4; ++i)
47792 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47794 nonzero_halves[nzcnt] = i;
47795 ++nzcnt;
47798 if (nzcnt == 1)
47800 gcc_assert (d->one_operand_p);
47801 nonzero_halves[1] = nonzero_halves[0];
47802 same_halves = true;
47804 else if (d->one_operand_p)
47806 gcc_assert (nonzero_halves[0] == 0);
47807 gcc_assert (nonzero_halves[1] == 1);
47810 if (nzcnt <= 2)
47812 if (d->perm[0] / nelt2 == nonzero_halves[1])
47814 /* Attempt to increase the likelihood that dfinal
47815 shuffle will be intra-lane. */
47816 char tmph = nonzero_halves[0];
47817 nonzero_halves[0] = nonzero_halves[1];
47818 nonzero_halves[1] = tmph;
47821 /* vperm2f128 or vperm2i128. */
47822 for (i = 0; i < nelt2; ++i)
47824 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47825 remap[i + nonzero_halves[0] * nelt2] = i;
47826 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47827 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47830 if (d->vmode != V8SFmode
47831 && d->vmode != V4DFmode
47832 && d->vmode != V8SImode)
47834 dremap.vmode = V8SImode;
47835 dremap.nelt = 8;
47836 for (i = 0; i < 4; ++i)
47838 dremap.perm[i] = i + nonzero_halves[0] * 4;
47839 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47843 else if (d->one_operand_p)
47844 return false;
47845 else if (TARGET_AVX2
47846 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47848 /* vpunpckl* */
47849 for (i = 0; i < nelt4; ++i)
47851 remap[i] = i * 2;
47852 remap[i + nelt] = i * 2 + 1;
47853 remap[i + nelt2] = i * 2 + nelt2;
47854 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47855 dremap.perm[i * 2] = i;
47856 dremap.perm[i * 2 + 1] = i + nelt;
47857 dremap.perm[i * 2 + nelt2] = i + nelt2;
47858 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47861 else if (TARGET_AVX2
47862 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47864 /* vpunpckh* */
47865 for (i = 0; i < nelt4; ++i)
47867 remap[i + nelt4] = i * 2;
47868 remap[i + nelt + nelt4] = i * 2 + 1;
47869 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47870 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47871 dremap.perm[i * 2] = i + nelt4;
47872 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47873 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47874 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47877 else
47878 return false;
47881 /* Use the remapping array set up above to move the elements from their
47882 swizzled locations into their final destinations. */
47883 dfinal = *d;
47884 for (i = 0; i < nelt; ++i)
47886 unsigned e = remap[d->perm[i]];
47887 gcc_assert (e < nelt);
47888 /* If same_halves is true, both halves of the remapped vector are the
47889 same. Avoid cross-lane accesses if possible. */
47890 if (same_halves && i >= nelt2)
47892 gcc_assert (e < nelt2);
47893 dfinal.perm[i] = e + nelt2;
47895 else
47896 dfinal.perm[i] = e;
47898 if (!d->testing_p)
47900 dremap.target = gen_reg_rtx (dremap.vmode);
47901 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47903 dfinal.op1 = dfinal.op0;
47904 dfinal.one_operand_p = true;
47906 /* Test if the final remap can be done with a single insn. For V4SFmode or
47907 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
47908 start_sequence ();
47909 ok = expand_vec_perm_1 (&dfinal);
47910 seq = get_insns ();
47911 end_sequence ();
47913 if (!ok)
47914 return false;
47916 if (d->testing_p)
47917 return true;
47919 if (dremap.vmode != dfinal.vmode)
47921 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
47922 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
47925 ok = expand_vec_perm_1 (&dremap);
47926 gcc_assert (ok);
47928 emit_insn (seq);
47929 return true;
47932 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47933 a single vector cross-lane permutation into vpermq followed
47934 by any of the single insn permutations. */
47936 static bool
47937 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
47939 struct expand_vec_perm_d dremap, dfinal;
47940 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
47941 unsigned contents[2];
47942 bool ok;
47944 if (!(TARGET_AVX2
47945 && (d->vmode == V32QImode || d->vmode == V16HImode)
47946 && d->one_operand_p))
47947 return false;
47949 contents[0] = 0;
47950 contents[1] = 0;
47951 for (i = 0; i < nelt2; ++i)
47953 contents[0] |= 1u << (d->perm[i] / nelt4);
47954 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
47957 for (i = 0; i < 2; ++i)
47959 unsigned int cnt = 0;
47960 for (j = 0; j < 4; ++j)
47961 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
47962 return false;
47965 if (d->testing_p)
47966 return true;
47968 dremap = *d;
47969 dremap.vmode = V4DImode;
47970 dremap.nelt = 4;
47971 dremap.target = gen_reg_rtx (V4DImode);
47972 dremap.op0 = gen_lowpart (V4DImode, d->op0);
47973 dremap.op1 = dremap.op0;
47974 dremap.one_operand_p = true;
47975 for (i = 0; i < 2; ++i)
47977 unsigned int cnt = 0;
47978 for (j = 0; j < 4; ++j)
47979 if ((contents[i] & (1u << j)) != 0)
47980 dremap.perm[2 * i + cnt++] = j;
47981 for (; cnt < 2; ++cnt)
47982 dremap.perm[2 * i + cnt] = 0;
47985 dfinal = *d;
47986 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
47987 dfinal.op1 = dfinal.op0;
47988 dfinal.one_operand_p = true;
47989 for (i = 0, j = 0; i < nelt; ++i)
47991 if (i == nelt2)
47992 j = 2;
47993 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
47994 if ((d->perm[i] / nelt4) == dremap.perm[j])
47996 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
47997 dfinal.perm[i] |= nelt4;
47998 else
47999 gcc_unreachable ();
48002 ok = expand_vec_perm_1 (&dremap);
48003 gcc_assert (ok);
48005 ok = expand_vec_perm_1 (&dfinal);
48006 gcc_assert (ok);
48008 return true;
48011 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48012 a vector permutation using two instructions, vperm2f128 resp.
48013 vperm2i128 followed by any single in-lane permutation. */
48015 static bool
48016 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48018 struct expand_vec_perm_d dfirst, dsecond;
48019 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48020 bool ok;
48022 if (!TARGET_AVX
48023 || GET_MODE_SIZE (d->vmode) != 32
48024 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48025 return false;
48027 dsecond = *d;
48028 dsecond.one_operand_p = false;
48029 dsecond.testing_p = true;
48031 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48032 immediate. For perm < 16 the second permutation uses
48033 d->op0 as first operand, for perm >= 16 it uses d->op1
48034 as first operand. The second operand is the result of
48035 vperm2[fi]128. */
48036 for (perm = 0; perm < 32; perm++)
48038 /* Ignore permutations which do not move anything cross-lane. */
48039 if (perm < 16)
48041 /* The second shuffle for e.g. V4DFmode has
48042 0123 and ABCD operands.
48043 Ignore AB23, as 23 is already in the second lane
48044 of the first operand. */
48045 if ((perm & 0xc) == (1 << 2)) continue;
48046 /* And 01CD, as 01 is in the first lane of the first
48047 operand. */
48048 if ((perm & 3) == 0) continue;
48049 /* And 4567, as then the vperm2[fi]128 doesn't change
48050 anything on the original 4567 second operand. */
48051 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48053 else
48055 /* The second shuffle for e.g. V4DFmode has
48056 4567 and ABCD operands.
48057 Ignore AB67, as 67 is already in the second lane
48058 of the first operand. */
48059 if ((perm & 0xc) == (3 << 2)) continue;
48060 /* And 45CD, as 45 is in the first lane of the first
48061 operand. */
48062 if ((perm & 3) == 2) continue;
48063 /* And 0123, as then the vperm2[fi]128 doesn't change
48064 anything on the original 0123 first operand. */
48065 if ((perm & 0xf) == (1 << 2)) continue;
48068 for (i = 0; i < nelt; i++)
48070 j = d->perm[i] / nelt2;
48071 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48072 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48073 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48074 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48075 else
48076 break;
48079 if (i == nelt)
48081 start_sequence ();
48082 ok = expand_vec_perm_1 (&dsecond);
48083 end_sequence ();
48085 else
48086 ok = false;
48088 if (ok)
48090 if (d->testing_p)
48091 return true;
48093 /* Found a usable second shuffle. dfirst will be
48094 vperm2f128 on d->op0 and d->op1. */
48095 dsecond.testing_p = false;
48096 dfirst = *d;
48097 dfirst.target = gen_reg_rtx (d->vmode);
48098 for (i = 0; i < nelt; i++)
48099 dfirst.perm[i] = (i & (nelt2 - 1))
48100 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48102 canonicalize_perm (&dfirst);
48103 ok = expand_vec_perm_1 (&dfirst);
48104 gcc_assert (ok);
48106 /* And dsecond is some single insn shuffle, taking
48107 d->op0 and result of vperm2f128 (if perm < 16) or
48108 d->op1 and result of vperm2f128 (otherwise). */
48109 if (perm >= 16)
48110 dsecond.op0 = dsecond.op1;
48111 dsecond.op1 = dfirst.target;
48113 ok = expand_vec_perm_1 (&dsecond);
48114 gcc_assert (ok);
48116 return true;
48119 /* For one operand, the only useful vperm2f128 permutation is 0x01
48120 aka lanes swap. */
48121 if (d->one_operand_p)
48122 return false;
48125 return false;
48128 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48129 a two vector permutation using 2 intra-lane interleave insns
48130 and cross-lane shuffle for 32-byte vectors. */
48132 static bool
48133 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48135 unsigned i, nelt;
48136 rtx (*gen) (rtx, rtx, rtx);
48138 if (d->one_operand_p)
48139 return false;
48140 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48142 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48144 else
48145 return false;
48147 nelt = d->nelt;
48148 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48149 return false;
48150 for (i = 0; i < nelt; i += 2)
48151 if (d->perm[i] != d->perm[0] + i / 2
48152 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48153 return false;
48155 if (d->testing_p)
48156 return true;
48158 switch (d->vmode)
48160 case V32QImode:
48161 if (d->perm[0])
48162 gen = gen_vec_interleave_highv32qi;
48163 else
48164 gen = gen_vec_interleave_lowv32qi;
48165 break;
48166 case V16HImode:
48167 if (d->perm[0])
48168 gen = gen_vec_interleave_highv16hi;
48169 else
48170 gen = gen_vec_interleave_lowv16hi;
48171 break;
48172 case V8SImode:
48173 if (d->perm[0])
48174 gen = gen_vec_interleave_highv8si;
48175 else
48176 gen = gen_vec_interleave_lowv8si;
48177 break;
48178 case V4DImode:
48179 if (d->perm[0])
48180 gen = gen_vec_interleave_highv4di;
48181 else
48182 gen = gen_vec_interleave_lowv4di;
48183 break;
48184 case V8SFmode:
48185 if (d->perm[0])
48186 gen = gen_vec_interleave_highv8sf;
48187 else
48188 gen = gen_vec_interleave_lowv8sf;
48189 break;
48190 case V4DFmode:
48191 if (d->perm[0])
48192 gen = gen_vec_interleave_highv4df;
48193 else
48194 gen = gen_vec_interleave_lowv4df;
48195 break;
48196 default:
48197 gcc_unreachable ();
48200 emit_insn (gen (d->target, d->op0, d->op1));
48201 return true;
48204 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48205 a single vector permutation using a single intra-lane vector
48206 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48207 the non-swapped and swapped vectors together. */
48209 static bool
48210 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48212 struct expand_vec_perm_d dfirst, dsecond;
48213 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48214 rtx_insn *seq;
48215 bool ok;
48216 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48218 if (!TARGET_AVX
48219 || TARGET_AVX2
48220 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48221 || !d->one_operand_p)
48222 return false;
48224 dfirst = *d;
48225 for (i = 0; i < nelt; i++)
48226 dfirst.perm[i] = 0xff;
48227 for (i = 0, msk = 0; i < nelt; i++)
48229 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48230 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48231 return false;
48232 dfirst.perm[j] = d->perm[i];
48233 if (j != i)
48234 msk |= (1 << i);
48236 for (i = 0; i < nelt; i++)
48237 if (dfirst.perm[i] == 0xff)
48238 dfirst.perm[i] = i;
48240 if (!d->testing_p)
48241 dfirst.target = gen_reg_rtx (dfirst.vmode);
48243 start_sequence ();
48244 ok = expand_vec_perm_1 (&dfirst);
48245 seq = get_insns ();
48246 end_sequence ();
48248 if (!ok)
48249 return false;
48251 if (d->testing_p)
48252 return true;
48254 emit_insn (seq);
48256 dsecond = *d;
48257 dsecond.op0 = dfirst.target;
48258 dsecond.op1 = dfirst.target;
48259 dsecond.one_operand_p = true;
48260 dsecond.target = gen_reg_rtx (dsecond.vmode);
48261 for (i = 0; i < nelt; i++)
48262 dsecond.perm[i] = i ^ nelt2;
48264 ok = expand_vec_perm_1 (&dsecond);
48265 gcc_assert (ok);
48267 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48268 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48269 return true;
48272 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48273 permutation using two vperm2f128, followed by a vshufpd insn blending
48274 the two vectors together. */
48276 static bool
48277 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48279 struct expand_vec_perm_d dfirst, dsecond, dthird;
48280 bool ok;
48282 if (!TARGET_AVX || (d->vmode != V4DFmode))
48283 return false;
48285 if (d->testing_p)
48286 return true;
48288 dfirst = *d;
48289 dsecond = *d;
48290 dthird = *d;
48292 dfirst.perm[0] = (d->perm[0] & ~1);
48293 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48294 dfirst.perm[2] = (d->perm[2] & ~1);
48295 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48296 dsecond.perm[0] = (d->perm[1] & ~1);
48297 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48298 dsecond.perm[2] = (d->perm[3] & ~1);
48299 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48300 dthird.perm[0] = (d->perm[0] % 2);
48301 dthird.perm[1] = (d->perm[1] % 2) + 4;
48302 dthird.perm[2] = (d->perm[2] % 2) + 2;
48303 dthird.perm[3] = (d->perm[3] % 2) + 6;
48305 dfirst.target = gen_reg_rtx (dfirst.vmode);
48306 dsecond.target = gen_reg_rtx (dsecond.vmode);
48307 dthird.op0 = dfirst.target;
48308 dthird.op1 = dsecond.target;
48309 dthird.one_operand_p = false;
48311 canonicalize_perm (&dfirst);
48312 canonicalize_perm (&dsecond);
48314 ok = expand_vec_perm_1 (&dfirst)
48315 && expand_vec_perm_1 (&dsecond)
48316 && expand_vec_perm_1 (&dthird);
48318 gcc_assert (ok);
48320 return true;
48323 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48324 permutation with two pshufb insns and an ior. We should have already
48325 failed all two instruction sequences. */
48327 static bool
48328 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48330 rtx rperm[2][16], vperm, l, h, op, m128;
48331 unsigned int i, nelt, eltsz;
48333 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48334 return false;
48335 gcc_assert (!d->one_operand_p);
48337 if (d->testing_p)
48338 return true;
48340 nelt = d->nelt;
48341 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48343 /* Generate two permutation masks. If the required element is within
48344 the given vector it is shuffled into the proper lane. If the required
48345 element is in the other vector, force a zero into the lane by setting
48346 bit 7 in the permutation mask. */
48347 m128 = GEN_INT (-128);
48348 for (i = 0; i < nelt; ++i)
48350 unsigned j, e = d->perm[i];
48351 unsigned which = (e >= nelt);
48352 if (e >= nelt)
48353 e -= nelt;
48355 for (j = 0; j < eltsz; ++j)
48357 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48358 rperm[1-which][i*eltsz + j] = m128;
48362 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48363 vperm = force_reg (V16QImode, vperm);
48365 l = gen_reg_rtx (V16QImode);
48366 op = gen_lowpart (V16QImode, d->op0);
48367 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48369 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48370 vperm = force_reg (V16QImode, vperm);
48372 h = gen_reg_rtx (V16QImode);
48373 op = gen_lowpart (V16QImode, d->op1);
48374 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48376 op = d->target;
48377 if (d->vmode != V16QImode)
48378 op = gen_reg_rtx (V16QImode);
48379 emit_insn (gen_iorv16qi3 (op, l, h));
48380 if (op != d->target)
48381 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48383 return true;
48386 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48387 with two vpshufb insns, vpermq and vpor. We should have already failed
48388 all two or three instruction sequences. */
48390 static bool
48391 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48393 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48394 unsigned int i, nelt, eltsz;
48396 if (!TARGET_AVX2
48397 || !d->one_operand_p
48398 || (d->vmode != V32QImode && d->vmode != V16HImode))
48399 return false;
48401 if (d->testing_p)
48402 return true;
48404 nelt = d->nelt;
48405 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48407 /* Generate two permutation masks. If the required element is within
48408 the same lane, it is shuffled in. If the required element from the
48409 other lane, force a zero by setting bit 7 in the permutation mask.
48410 In the other mask the mask has non-negative elements if element
48411 is requested from the other lane, but also moved to the other lane,
48412 so that the result of vpshufb can have the two V2TImode halves
48413 swapped. */
48414 m128 = GEN_INT (-128);
48415 for (i = 0; i < nelt; ++i)
48417 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48418 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48420 for (j = 0; j < eltsz; ++j)
48422 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48423 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48427 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48428 vperm = force_reg (V32QImode, vperm);
48430 h = gen_reg_rtx (V32QImode);
48431 op = gen_lowpart (V32QImode, d->op0);
48432 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48434 /* Swap the 128-byte lanes of h into hp. */
48435 hp = gen_reg_rtx (V4DImode);
48436 op = gen_lowpart (V4DImode, h);
48437 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48438 const1_rtx));
48440 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48441 vperm = force_reg (V32QImode, vperm);
48443 l = gen_reg_rtx (V32QImode);
48444 op = gen_lowpart (V32QImode, d->op0);
48445 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48447 op = d->target;
48448 if (d->vmode != V32QImode)
48449 op = gen_reg_rtx (V32QImode);
48450 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48451 if (op != d->target)
48452 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48454 return true;
48457 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48458 and extract-odd permutations of two V32QImode and V16QImode operand
48459 with two vpshufb insns, vpor and vpermq. We should have already
48460 failed all two or three instruction sequences. */
48462 static bool
48463 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48465 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48466 unsigned int i, nelt, eltsz;
48468 if (!TARGET_AVX2
48469 || d->one_operand_p
48470 || (d->vmode != V32QImode && d->vmode != V16HImode))
48471 return false;
48473 for (i = 0; i < d->nelt; ++i)
48474 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48475 return false;
48477 if (d->testing_p)
48478 return true;
48480 nelt = d->nelt;
48481 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48483 /* Generate two permutation masks. In the first permutation mask
48484 the first quarter will contain indexes for the first half
48485 of the op0, the second quarter will contain bit 7 set, third quarter
48486 will contain indexes for the second half of the op0 and the
48487 last quarter bit 7 set. In the second permutation mask
48488 the first quarter will contain bit 7 set, the second quarter
48489 indexes for the first half of the op1, the third quarter bit 7 set
48490 and last quarter indexes for the second half of the op1.
48491 I.e. the first mask e.g. for V32QImode extract even will be:
48492 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48493 (all values masked with 0xf except for -128) and second mask
48494 for extract even will be
48495 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48496 m128 = GEN_INT (-128);
48497 for (i = 0; i < nelt; ++i)
48499 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48500 unsigned which = d->perm[i] >= nelt;
48501 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48503 for (j = 0; j < eltsz; ++j)
48505 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48506 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48510 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48511 vperm = force_reg (V32QImode, vperm);
48513 l = gen_reg_rtx (V32QImode);
48514 op = gen_lowpart (V32QImode, d->op0);
48515 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48517 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48518 vperm = force_reg (V32QImode, vperm);
48520 h = gen_reg_rtx (V32QImode);
48521 op = gen_lowpart (V32QImode, d->op1);
48522 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48524 ior = gen_reg_rtx (V32QImode);
48525 emit_insn (gen_iorv32qi3 (ior, l, h));
48527 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48528 op = gen_reg_rtx (V4DImode);
48529 ior = gen_lowpart (V4DImode, ior);
48530 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48531 const1_rtx, GEN_INT (3)));
48532 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48534 return true;
48537 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48538 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48539 with two "and" and "pack" or two "shift" and "pack" insns. We should
48540 have already failed all two instruction sequences. */
48542 static bool
48543 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48545 rtx op, dop0, dop1, t, rperm[16];
48546 unsigned i, odd, c, s, nelt = d->nelt;
48547 bool end_perm = false;
48548 machine_mode half_mode;
48549 rtx (*gen_and) (rtx, rtx, rtx);
48550 rtx (*gen_pack) (rtx, rtx, rtx);
48551 rtx (*gen_shift) (rtx, rtx, rtx);
48553 if (d->one_operand_p)
48554 return false;
48556 switch (d->vmode)
48558 case V8HImode:
48559 /* Required for "pack". */
48560 if (!TARGET_SSE4_1)
48561 return false;
48562 c = 0xffff;
48563 s = 16;
48564 half_mode = V4SImode;
48565 gen_and = gen_andv4si3;
48566 gen_pack = gen_sse4_1_packusdw;
48567 gen_shift = gen_lshrv4si3;
48568 break;
48569 case V16QImode:
48570 /* No check as all instructions are SSE2. */
48571 c = 0xff;
48572 s = 8;
48573 half_mode = V8HImode;
48574 gen_and = gen_andv8hi3;
48575 gen_pack = gen_sse2_packuswb;
48576 gen_shift = gen_lshrv8hi3;
48577 break;
48578 case V16HImode:
48579 if (!TARGET_AVX2)
48580 return false;
48581 c = 0xffff;
48582 s = 16;
48583 half_mode = V8SImode;
48584 gen_and = gen_andv8si3;
48585 gen_pack = gen_avx2_packusdw;
48586 gen_shift = gen_lshrv8si3;
48587 end_perm = true;
48588 break;
48589 case V32QImode:
48590 if (!TARGET_AVX2)
48591 return false;
48592 c = 0xff;
48593 s = 8;
48594 half_mode = V16HImode;
48595 gen_and = gen_andv16hi3;
48596 gen_pack = gen_avx2_packuswb;
48597 gen_shift = gen_lshrv16hi3;
48598 end_perm = true;
48599 break;
48600 default:
48601 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48602 general shuffles. */
48603 return false;
48606 /* Check that permutation is even or odd. */
48607 odd = d->perm[0];
48608 if (odd > 1)
48609 return false;
48611 for (i = 1; i < nelt; ++i)
48612 if (d->perm[i] != 2 * i + odd)
48613 return false;
48615 if (d->testing_p)
48616 return true;
48618 dop0 = gen_reg_rtx (half_mode);
48619 dop1 = gen_reg_rtx (half_mode);
48620 if (odd == 0)
48622 for (i = 0; i < nelt / 2; i++)
48623 rperm[i] = GEN_INT (c);
48624 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48625 t = force_reg (half_mode, t);
48626 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48627 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48629 else
48631 emit_insn (gen_shift (dop0,
48632 gen_lowpart (half_mode, d->op0),
48633 GEN_INT (s)));
48634 emit_insn (gen_shift (dop1,
48635 gen_lowpart (half_mode, d->op1),
48636 GEN_INT (s)));
48638 /* In AVX2 for 256 bit case we need to permute pack result. */
48639 if (TARGET_AVX2 && end_perm)
48641 op = gen_reg_rtx (d->vmode);
48642 t = gen_reg_rtx (V4DImode);
48643 emit_insn (gen_pack (op, dop0, dop1));
48644 emit_insn (gen_avx2_permv4di_1 (t,
48645 gen_lowpart (V4DImode, op),
48646 const0_rtx,
48647 const2_rtx,
48648 const1_rtx,
48649 GEN_INT (3)));
48650 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48652 else
48653 emit_insn (gen_pack (d->target, dop0, dop1));
48655 return true;
48658 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48659 and extract-odd permutations. */
48661 static bool
48662 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48664 rtx t1, t2, t3, t4, t5;
48666 switch (d->vmode)
48668 case V4DFmode:
48669 if (d->testing_p)
48670 break;
48671 t1 = gen_reg_rtx (V4DFmode);
48672 t2 = gen_reg_rtx (V4DFmode);
48674 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48675 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48676 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48678 /* Now an unpck[lh]pd will produce the result required. */
48679 if (odd)
48680 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48681 else
48682 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48683 emit_insn (t3);
48684 break;
48686 case V8SFmode:
48688 int mask = odd ? 0xdd : 0x88;
48690 if (d->testing_p)
48691 break;
48692 t1 = gen_reg_rtx (V8SFmode);
48693 t2 = gen_reg_rtx (V8SFmode);
48694 t3 = gen_reg_rtx (V8SFmode);
48696 /* Shuffle within the 128-bit lanes to produce:
48697 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48698 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48699 GEN_INT (mask)));
48701 /* Shuffle the lanes around to produce:
48702 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48703 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48704 GEN_INT (0x3)));
48706 /* Shuffle within the 128-bit lanes to produce:
48707 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48708 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48710 /* Shuffle within the 128-bit lanes to produce:
48711 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48712 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48714 /* Shuffle the lanes around to produce:
48715 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48716 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48717 GEN_INT (0x20)));
48719 break;
48721 case V2DFmode:
48722 case V4SFmode:
48723 case V2DImode:
48724 case V4SImode:
48725 /* These are always directly implementable by expand_vec_perm_1. */
48726 gcc_unreachable ();
48728 case V8HImode:
48729 if (TARGET_SSE4_1)
48730 return expand_vec_perm_even_odd_pack (d);
48731 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48732 return expand_vec_perm_pshufb2 (d);
48733 else
48735 if (d->testing_p)
48736 break;
48737 /* We need 2*log2(N)-1 operations to achieve odd/even
48738 with interleave. */
48739 t1 = gen_reg_rtx (V8HImode);
48740 t2 = gen_reg_rtx (V8HImode);
48741 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48742 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48743 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48744 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48745 if (odd)
48746 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48747 else
48748 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48749 emit_insn (t3);
48751 break;
48753 case V16QImode:
48754 return expand_vec_perm_even_odd_pack (d);
48756 case V16HImode:
48757 case V32QImode:
48758 return expand_vec_perm_even_odd_pack (d);
48760 case V4DImode:
48761 if (!TARGET_AVX2)
48763 struct expand_vec_perm_d d_copy = *d;
48764 d_copy.vmode = V4DFmode;
48765 if (d->testing_p)
48766 d_copy.target = gen_lowpart (V4DFmode, d->target);
48767 else
48768 d_copy.target = gen_reg_rtx (V4DFmode);
48769 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48770 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48771 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48773 if (!d->testing_p)
48774 emit_move_insn (d->target,
48775 gen_lowpart (V4DImode, d_copy.target));
48776 return true;
48778 return false;
48781 if (d->testing_p)
48782 break;
48784 t1 = gen_reg_rtx (V4DImode);
48785 t2 = gen_reg_rtx (V4DImode);
48787 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48788 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48789 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48791 /* Now an vpunpck[lh]qdq will produce the result required. */
48792 if (odd)
48793 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48794 else
48795 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48796 emit_insn (t3);
48797 break;
48799 case V8SImode:
48800 if (!TARGET_AVX2)
48802 struct expand_vec_perm_d d_copy = *d;
48803 d_copy.vmode = V8SFmode;
48804 if (d->testing_p)
48805 d_copy.target = gen_lowpart (V8SFmode, d->target);
48806 else
48807 d_copy.target = gen_reg_rtx (V8SFmode);
48808 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48809 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48810 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48812 if (!d->testing_p)
48813 emit_move_insn (d->target,
48814 gen_lowpart (V8SImode, d_copy.target));
48815 return true;
48817 return false;
48820 if (d->testing_p)
48821 break;
48823 t1 = gen_reg_rtx (V8SImode);
48824 t2 = gen_reg_rtx (V8SImode);
48825 t3 = gen_reg_rtx (V4DImode);
48826 t4 = gen_reg_rtx (V4DImode);
48827 t5 = gen_reg_rtx (V4DImode);
48829 /* Shuffle the lanes around into
48830 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48831 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48832 gen_lowpart (V4DImode, d->op1),
48833 GEN_INT (0x20)));
48834 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48835 gen_lowpart (V4DImode, d->op1),
48836 GEN_INT (0x31)));
48838 /* Swap the 2nd and 3rd position in each lane into
48839 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48840 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48841 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48842 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48843 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48845 /* Now an vpunpck[lh]qdq will produce
48846 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48847 if (odd)
48848 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48849 gen_lowpart (V4DImode, t2));
48850 else
48851 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48852 gen_lowpart (V4DImode, t2));
48853 emit_insn (t3);
48854 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48855 break;
48857 default:
48858 gcc_unreachable ();
48861 return true;
48864 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48865 extract-even and extract-odd permutations. */
48867 static bool
48868 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48870 unsigned i, odd, nelt = d->nelt;
48872 odd = d->perm[0];
48873 if (odd != 0 && odd != 1)
48874 return false;
48876 for (i = 1; i < nelt; ++i)
48877 if (d->perm[i] != 2 * i + odd)
48878 return false;
48880 return expand_vec_perm_even_odd_1 (d, odd);
48883 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48884 permutations. We assume that expand_vec_perm_1 has already failed. */
48886 static bool
48887 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48889 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48890 machine_mode vmode = d->vmode;
48891 unsigned char perm2[4];
48892 rtx op0 = d->op0, dest;
48893 bool ok;
48895 switch (vmode)
48897 case V4DFmode:
48898 case V8SFmode:
48899 /* These are special-cased in sse.md so that we can optionally
48900 use the vbroadcast instruction. They expand to two insns
48901 if the input happens to be in a register. */
48902 gcc_unreachable ();
48904 case V2DFmode:
48905 case V2DImode:
48906 case V4SFmode:
48907 case V4SImode:
48908 /* These are always implementable using standard shuffle patterns. */
48909 gcc_unreachable ();
48911 case V8HImode:
48912 case V16QImode:
48913 /* These can be implemented via interleave. We save one insn by
48914 stopping once we have promoted to V4SImode and then use pshufd. */
48915 if (d->testing_p)
48916 return true;
48919 rtx dest;
48920 rtx (*gen) (rtx, rtx, rtx)
48921 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
48922 : gen_vec_interleave_lowv8hi;
48924 if (elt >= nelt2)
48926 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
48927 : gen_vec_interleave_highv8hi;
48928 elt -= nelt2;
48930 nelt2 /= 2;
48932 dest = gen_reg_rtx (vmode);
48933 emit_insn (gen (dest, op0, op0));
48934 vmode = get_mode_wider_vector (vmode);
48935 op0 = gen_lowpart (vmode, dest);
48937 while (vmode != V4SImode);
48939 memset (perm2, elt, 4);
48940 dest = gen_reg_rtx (V4SImode);
48941 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
48942 gcc_assert (ok);
48943 if (!d->testing_p)
48944 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
48945 return true;
48947 case V64QImode:
48948 case V32QImode:
48949 case V16HImode:
48950 case V8SImode:
48951 case V4DImode:
48952 /* For AVX2 broadcasts of the first element vpbroadcast* or
48953 vpermq should be used by expand_vec_perm_1. */
48954 gcc_assert (!TARGET_AVX2 || d->perm[0]);
48955 return false;
48957 default:
48958 gcc_unreachable ();
48962 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48963 broadcast permutations. */
48965 static bool
48966 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
48968 unsigned i, elt, nelt = d->nelt;
48970 if (!d->one_operand_p)
48971 return false;
48973 elt = d->perm[0];
48974 for (i = 1; i < nelt; ++i)
48975 if (d->perm[i] != elt)
48976 return false;
48978 return expand_vec_perm_broadcast_1 (d);
48981 /* Implement arbitrary permutations of two V64QImode operands
48982 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
48983 static bool
48984 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
48986 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
48987 return false;
48989 if (d->testing_p)
48990 return true;
48992 struct expand_vec_perm_d ds[2];
48993 rtx rperm[128], vperm, target0, target1;
48994 unsigned int i, nelt;
48995 machine_mode vmode;
48997 nelt = d->nelt;
48998 vmode = V64QImode;
49000 for (i = 0; i < 2; i++)
49002 ds[i] = *d;
49003 ds[i].vmode = V32HImode;
49004 ds[i].nelt = 32;
49005 ds[i].target = gen_reg_rtx (V32HImode);
49006 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49007 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49010 /* Prepare permutations such that the first one takes care of
49011 putting the even bytes into the right positions or one higher
49012 positions (ds[0]) and the second one takes care of
49013 putting the odd bytes into the right positions or one below
49014 (ds[1]). */
49016 for (i = 0; i < nelt; i++)
49018 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49019 if (i & 1)
49021 rperm[i] = constm1_rtx;
49022 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49024 else
49026 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49027 rperm[i + 64] = constm1_rtx;
49031 bool ok = expand_vec_perm_1 (&ds[0]);
49032 gcc_assert (ok);
49033 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49035 ok = expand_vec_perm_1 (&ds[1]);
49036 gcc_assert (ok);
49037 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49039 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49040 vperm = force_reg (vmode, vperm);
49041 target0 = gen_reg_rtx (V64QImode);
49042 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49044 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49045 vperm = force_reg (vmode, vperm);
49046 target1 = gen_reg_rtx (V64QImode);
49047 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49049 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49050 return true;
49053 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49054 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49055 all the shorter instruction sequences. */
49057 static bool
49058 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49060 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49061 unsigned int i, nelt, eltsz;
49062 bool used[4];
49064 if (!TARGET_AVX2
49065 || d->one_operand_p
49066 || (d->vmode != V32QImode && d->vmode != V16HImode))
49067 return false;
49069 if (d->testing_p)
49070 return true;
49072 nelt = d->nelt;
49073 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49075 /* Generate 4 permutation masks. If the required element is within
49076 the same lane, it is shuffled in. If the required element from the
49077 other lane, force a zero by setting bit 7 in the permutation mask.
49078 In the other mask the mask has non-negative elements if element
49079 is requested from the other lane, but also moved to the other lane,
49080 so that the result of vpshufb can have the two V2TImode halves
49081 swapped. */
49082 m128 = GEN_INT (-128);
49083 for (i = 0; i < 32; ++i)
49085 rperm[0][i] = m128;
49086 rperm[1][i] = m128;
49087 rperm[2][i] = m128;
49088 rperm[3][i] = m128;
49090 used[0] = false;
49091 used[1] = false;
49092 used[2] = false;
49093 used[3] = false;
49094 for (i = 0; i < nelt; ++i)
49096 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49097 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49098 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49100 for (j = 0; j < eltsz; ++j)
49101 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49102 used[which] = true;
49105 for (i = 0; i < 2; ++i)
49107 if (!used[2 * i + 1])
49109 h[i] = NULL_RTX;
49110 continue;
49112 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49113 gen_rtvec_v (32, rperm[2 * i + 1]));
49114 vperm = force_reg (V32QImode, vperm);
49115 h[i] = gen_reg_rtx (V32QImode);
49116 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49117 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49120 /* Swap the 128-byte lanes of h[X]. */
49121 for (i = 0; i < 2; ++i)
49123 if (h[i] == NULL_RTX)
49124 continue;
49125 op = gen_reg_rtx (V4DImode);
49126 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49127 const2_rtx, GEN_INT (3), const0_rtx,
49128 const1_rtx));
49129 h[i] = gen_lowpart (V32QImode, op);
49132 for (i = 0; i < 2; ++i)
49134 if (!used[2 * i])
49136 l[i] = NULL_RTX;
49137 continue;
49139 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49140 vperm = force_reg (V32QImode, vperm);
49141 l[i] = gen_reg_rtx (V32QImode);
49142 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49143 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49146 for (i = 0; i < 2; ++i)
49148 if (h[i] && l[i])
49150 op = gen_reg_rtx (V32QImode);
49151 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49152 l[i] = op;
49154 else if (h[i])
49155 l[i] = h[i];
49158 gcc_assert (l[0] && l[1]);
49159 op = d->target;
49160 if (d->vmode != V32QImode)
49161 op = gen_reg_rtx (V32QImode);
49162 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49163 if (op != d->target)
49164 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49165 return true;
49168 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49169 With all of the interface bits taken care of, perform the expansion
49170 in D and return true on success. */
49172 static bool
49173 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49175 /* Try a single instruction expansion. */
49176 if (expand_vec_perm_1 (d))
49177 return true;
49179 /* Try sequences of two instructions. */
49181 if (expand_vec_perm_pshuflw_pshufhw (d))
49182 return true;
49184 if (expand_vec_perm_palignr (d, false))
49185 return true;
49187 if (expand_vec_perm_interleave2 (d))
49188 return true;
49190 if (expand_vec_perm_broadcast (d))
49191 return true;
49193 if (expand_vec_perm_vpermq_perm_1 (d))
49194 return true;
49196 if (expand_vec_perm_vperm2f128 (d))
49197 return true;
49199 if (expand_vec_perm_pblendv (d))
49200 return true;
49202 /* Try sequences of three instructions. */
49204 if (expand_vec_perm_even_odd_pack (d))
49205 return true;
49207 if (expand_vec_perm_2vperm2f128_vshuf (d))
49208 return true;
49210 if (expand_vec_perm_pshufb2 (d))
49211 return true;
49213 if (expand_vec_perm_interleave3 (d))
49214 return true;
49216 if (expand_vec_perm_vperm2f128_vblend (d))
49217 return true;
49219 /* Try sequences of four instructions. */
49221 if (expand_vec_perm_vpshufb2_vpermq (d))
49222 return true;
49224 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49225 return true;
49227 if (expand_vec_perm_vpermi2_vpshub2 (d))
49228 return true;
49230 /* ??? Look for narrow permutations whose element orderings would
49231 allow the promotion to a wider mode. */
49233 /* ??? Look for sequences of interleave or a wider permute that place
49234 the data into the correct lanes for a half-vector shuffle like
49235 pshuf[lh]w or vpermilps. */
49237 /* ??? Look for sequences of interleave that produce the desired results.
49238 The combinatorics of punpck[lh] get pretty ugly... */
49240 if (expand_vec_perm_even_odd (d))
49241 return true;
49243 /* Even longer sequences. */
49244 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49245 return true;
49247 return false;
49250 /* If a permutation only uses one operand, make it clear. Returns true
49251 if the permutation references both operands. */
49253 static bool
49254 canonicalize_perm (struct expand_vec_perm_d *d)
49256 int i, which, nelt = d->nelt;
49258 for (i = which = 0; i < nelt; ++i)
49259 which |= (d->perm[i] < nelt ? 1 : 2);
49261 d->one_operand_p = true;
49262 switch (which)
49264 default:
49265 gcc_unreachable();
49267 case 3:
49268 if (!rtx_equal_p (d->op0, d->op1))
49270 d->one_operand_p = false;
49271 break;
49273 /* The elements of PERM do not suggest that only the first operand
49274 is used, but both operands are identical. Allow easier matching
49275 of the permutation by folding the permutation into the single
49276 input vector. */
49277 /* FALLTHRU */
49279 case 2:
49280 for (i = 0; i < nelt; ++i)
49281 d->perm[i] &= nelt - 1;
49282 d->op0 = d->op1;
49283 break;
49285 case 1:
49286 d->op1 = d->op0;
49287 break;
49290 return (which == 3);
49293 bool
49294 ix86_expand_vec_perm_const (rtx operands[4])
49296 struct expand_vec_perm_d d;
49297 unsigned char perm[MAX_VECT_LEN];
49298 int i, nelt;
49299 bool two_args;
49300 rtx sel;
49302 d.target = operands[0];
49303 d.op0 = operands[1];
49304 d.op1 = operands[2];
49305 sel = operands[3];
49307 d.vmode = GET_MODE (d.target);
49308 gcc_assert (VECTOR_MODE_P (d.vmode));
49309 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49310 d.testing_p = false;
49312 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49313 gcc_assert (XVECLEN (sel, 0) == nelt);
49314 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49316 for (i = 0; i < nelt; ++i)
49318 rtx e = XVECEXP (sel, 0, i);
49319 int ei = INTVAL (e) & (2 * nelt - 1);
49320 d.perm[i] = ei;
49321 perm[i] = ei;
49324 two_args = canonicalize_perm (&d);
49326 if (ix86_expand_vec_perm_const_1 (&d))
49327 return true;
49329 /* If the selector says both arguments are needed, but the operands are the
49330 same, the above tried to expand with one_operand_p and flattened selector.
49331 If that didn't work, retry without one_operand_p; we succeeded with that
49332 during testing. */
49333 if (two_args && d.one_operand_p)
49335 d.one_operand_p = false;
49336 memcpy (d.perm, perm, sizeof (perm));
49337 return ix86_expand_vec_perm_const_1 (&d);
49340 return false;
49343 /* Implement targetm.vectorize.vec_perm_const_ok. */
49345 static bool
49346 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49347 const unsigned char *sel)
49349 struct expand_vec_perm_d d;
49350 unsigned int i, nelt, which;
49351 bool ret;
49353 d.vmode = vmode;
49354 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49355 d.testing_p = true;
49357 /* Given sufficient ISA support we can just return true here
49358 for selected vector modes. */
49359 switch (d.vmode)
49361 case V16SFmode:
49362 case V16SImode:
49363 case V8DImode:
49364 case V8DFmode:
49365 if (TARGET_AVX512F)
49366 /* All implementable with a single vpermi2 insn. */
49367 return true;
49368 break;
49369 case V32HImode:
49370 if (TARGET_AVX512BW)
49371 /* All implementable with a single vpermi2 insn. */
49372 return true;
49373 break;
49374 case V64QImode:
49375 if (TARGET_AVX512BW)
49376 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49377 return true;
49378 break;
49379 case V8SImode:
49380 case V8SFmode:
49381 case V4DFmode:
49382 case V4DImode:
49383 if (TARGET_AVX512VL)
49384 /* All implementable with a single vpermi2 insn. */
49385 return true;
49386 break;
49387 case V16HImode:
49388 if (TARGET_AVX2)
49389 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49390 return true;
49391 break;
49392 case V32QImode:
49393 if (TARGET_AVX2)
49394 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49395 return true;
49396 break;
49397 case V4SImode:
49398 case V4SFmode:
49399 case V8HImode:
49400 case V16QImode:
49401 /* All implementable with a single vpperm insn. */
49402 if (TARGET_XOP)
49403 return true;
49404 /* All implementable with 2 pshufb + 1 ior. */
49405 if (TARGET_SSSE3)
49406 return true;
49407 break;
49408 case V2DImode:
49409 case V2DFmode:
49410 /* All implementable with shufpd or unpck[lh]pd. */
49411 return true;
49412 default:
49413 return false;
49416 /* Extract the values from the vector CST into the permutation
49417 array in D. */
49418 memcpy (d.perm, sel, nelt);
49419 for (i = which = 0; i < nelt; ++i)
49421 unsigned char e = d.perm[i];
49422 gcc_assert (e < 2 * nelt);
49423 which |= (e < nelt ? 1 : 2);
49426 /* For all elements from second vector, fold the elements to first. */
49427 if (which == 2)
49428 for (i = 0; i < nelt; ++i)
49429 d.perm[i] -= nelt;
49431 /* Check whether the mask can be applied to the vector type. */
49432 d.one_operand_p = (which != 3);
49434 /* Implementable with shufps or pshufd. */
49435 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49436 return true;
49438 /* Otherwise we have to go through the motions and see if we can
49439 figure out how to generate the requested permutation. */
49440 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49441 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49442 if (!d.one_operand_p)
49443 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49445 start_sequence ();
49446 ret = ix86_expand_vec_perm_const_1 (&d);
49447 end_sequence ();
49449 return ret;
49452 void
49453 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49455 struct expand_vec_perm_d d;
49456 unsigned i, nelt;
49458 d.target = targ;
49459 d.op0 = op0;
49460 d.op1 = op1;
49461 d.vmode = GET_MODE (targ);
49462 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49463 d.one_operand_p = false;
49464 d.testing_p = false;
49466 for (i = 0; i < nelt; ++i)
49467 d.perm[i] = i * 2 + odd;
49469 /* We'll either be able to implement the permutation directly... */
49470 if (expand_vec_perm_1 (&d))
49471 return;
49473 /* ... or we use the special-case patterns. */
49474 expand_vec_perm_even_odd_1 (&d, odd);
49477 static void
49478 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49480 struct expand_vec_perm_d d;
49481 unsigned i, nelt, base;
49482 bool ok;
49484 d.target = targ;
49485 d.op0 = op0;
49486 d.op1 = op1;
49487 d.vmode = GET_MODE (targ);
49488 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49489 d.one_operand_p = false;
49490 d.testing_p = false;
49492 base = high_p ? nelt / 2 : 0;
49493 for (i = 0; i < nelt / 2; ++i)
49495 d.perm[i * 2] = i + base;
49496 d.perm[i * 2 + 1] = i + base + nelt;
49499 /* Note that for AVX this isn't one instruction. */
49500 ok = ix86_expand_vec_perm_const_1 (&d);
49501 gcc_assert (ok);
49505 /* Expand a vector operation CODE for a V*QImode in terms of the
49506 same operation on V*HImode. */
49508 void
49509 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49511 machine_mode qimode = GET_MODE (dest);
49512 machine_mode himode;
49513 rtx (*gen_il) (rtx, rtx, rtx);
49514 rtx (*gen_ih) (rtx, rtx, rtx);
49515 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49516 struct expand_vec_perm_d d;
49517 bool ok, full_interleave;
49518 bool uns_p = false;
49519 int i;
49521 switch (qimode)
49523 case V16QImode:
49524 himode = V8HImode;
49525 gen_il = gen_vec_interleave_lowv16qi;
49526 gen_ih = gen_vec_interleave_highv16qi;
49527 break;
49528 case V32QImode:
49529 himode = V16HImode;
49530 gen_il = gen_avx2_interleave_lowv32qi;
49531 gen_ih = gen_avx2_interleave_highv32qi;
49532 break;
49533 case V64QImode:
49534 himode = V32HImode;
49535 gen_il = gen_avx512bw_interleave_lowv64qi;
49536 gen_ih = gen_avx512bw_interleave_highv64qi;
49537 break;
49538 default:
49539 gcc_unreachable ();
49542 op2_l = op2_h = op2;
49543 switch (code)
49545 case MULT:
49546 /* Unpack data such that we've got a source byte in each low byte of
49547 each word. We don't care what goes into the high byte of each word.
49548 Rather than trying to get zero in there, most convenient is to let
49549 it be a copy of the low byte. */
49550 op2_l = gen_reg_rtx (qimode);
49551 op2_h = gen_reg_rtx (qimode);
49552 emit_insn (gen_il (op2_l, op2, op2));
49553 emit_insn (gen_ih (op2_h, op2, op2));
49554 /* FALLTHRU */
49556 op1_l = gen_reg_rtx (qimode);
49557 op1_h = gen_reg_rtx (qimode);
49558 emit_insn (gen_il (op1_l, op1, op1));
49559 emit_insn (gen_ih (op1_h, op1, op1));
49560 full_interleave = qimode == V16QImode;
49561 break;
49563 case ASHIFT:
49564 case LSHIFTRT:
49565 uns_p = true;
49566 /* FALLTHRU */
49567 case ASHIFTRT:
49568 op1_l = gen_reg_rtx (himode);
49569 op1_h = gen_reg_rtx (himode);
49570 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49571 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49572 full_interleave = true;
49573 break;
49574 default:
49575 gcc_unreachable ();
49578 /* Perform the operation. */
49579 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49580 1, OPTAB_DIRECT);
49581 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49582 1, OPTAB_DIRECT);
49583 gcc_assert (res_l && res_h);
49585 /* Merge the data back into the right place. */
49586 d.target = dest;
49587 d.op0 = gen_lowpart (qimode, res_l);
49588 d.op1 = gen_lowpart (qimode, res_h);
49589 d.vmode = qimode;
49590 d.nelt = GET_MODE_NUNITS (qimode);
49591 d.one_operand_p = false;
49592 d.testing_p = false;
49594 if (full_interleave)
49596 /* For SSE2, we used an full interleave, so the desired
49597 results are in the even elements. */
49598 for (i = 0; i < 64; ++i)
49599 d.perm[i] = i * 2;
49601 else
49603 /* For AVX, the interleave used above was not cross-lane. So the
49604 extraction is evens but with the second and third quarter swapped.
49605 Happily, that is even one insn shorter than even extraction. */
49606 for (i = 0; i < 64; ++i)
49607 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49610 ok = ix86_expand_vec_perm_const_1 (&d);
49611 gcc_assert (ok);
49613 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49614 gen_rtx_fmt_ee (code, qimode, op1, op2));
49617 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49618 if op is CONST_VECTOR with all odd elements equal to their
49619 preceding element. */
49621 static bool
49622 const_vector_equal_evenodd_p (rtx op)
49624 machine_mode mode = GET_MODE (op);
49625 int i, nunits = GET_MODE_NUNITS (mode);
49626 if (GET_CODE (op) != CONST_VECTOR
49627 || nunits != CONST_VECTOR_NUNITS (op))
49628 return false;
49629 for (i = 0; i < nunits; i += 2)
49630 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49631 return false;
49632 return true;
49635 void
49636 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49637 bool uns_p, bool odd_p)
49639 machine_mode mode = GET_MODE (op1);
49640 machine_mode wmode = GET_MODE (dest);
49641 rtx x;
49642 rtx orig_op1 = op1, orig_op2 = op2;
49644 if (!nonimmediate_operand (op1, mode))
49645 op1 = force_reg (mode, op1);
49646 if (!nonimmediate_operand (op2, mode))
49647 op2 = force_reg (mode, op2);
49649 /* We only play even/odd games with vectors of SImode. */
49650 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49652 /* If we're looking for the odd results, shift those members down to
49653 the even slots. For some cpus this is faster than a PSHUFD. */
49654 if (odd_p)
49656 /* For XOP use vpmacsdqh, but only for smult, as it is only
49657 signed. */
49658 if (TARGET_XOP && mode == V4SImode && !uns_p)
49660 x = force_reg (wmode, CONST0_RTX (wmode));
49661 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49662 return;
49665 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49666 if (!const_vector_equal_evenodd_p (orig_op1))
49667 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49668 x, NULL, 1, OPTAB_DIRECT);
49669 if (!const_vector_equal_evenodd_p (orig_op2))
49670 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49671 x, NULL, 1, OPTAB_DIRECT);
49672 op1 = gen_lowpart (mode, op1);
49673 op2 = gen_lowpart (mode, op2);
49676 if (mode == V16SImode)
49678 if (uns_p)
49679 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49680 else
49681 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49683 else if (mode == V8SImode)
49685 if (uns_p)
49686 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49687 else
49688 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49690 else if (uns_p)
49691 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49692 else if (TARGET_SSE4_1)
49693 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49694 else
49696 rtx s1, s2, t0, t1, t2;
49698 /* The easiest way to implement this without PMULDQ is to go through
49699 the motions as if we are performing a full 64-bit multiply. With
49700 the exception that we need to do less shuffling of the elements. */
49702 /* Compute the sign-extension, aka highparts, of the two operands. */
49703 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49704 op1, pc_rtx, pc_rtx);
49705 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49706 op2, pc_rtx, pc_rtx);
49708 /* Multiply LO(A) * HI(B), and vice-versa. */
49709 t1 = gen_reg_rtx (wmode);
49710 t2 = gen_reg_rtx (wmode);
49711 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49712 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49714 /* Multiply LO(A) * LO(B). */
49715 t0 = gen_reg_rtx (wmode);
49716 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49718 /* Combine and shift the highparts into place. */
49719 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49720 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49721 1, OPTAB_DIRECT);
49723 /* Combine high and low parts. */
49724 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49725 return;
49727 emit_insn (x);
49730 void
49731 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49732 bool uns_p, bool high_p)
49734 machine_mode wmode = GET_MODE (dest);
49735 machine_mode mode = GET_MODE (op1);
49736 rtx t1, t2, t3, t4, mask;
49738 switch (mode)
49740 case V4SImode:
49741 t1 = gen_reg_rtx (mode);
49742 t2 = gen_reg_rtx (mode);
49743 if (TARGET_XOP && !uns_p)
49745 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49746 shuffle the elements once so that all elements are in the right
49747 place for immediate use: { A C B D }. */
49748 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49749 const1_rtx, GEN_INT (3)));
49750 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49751 const1_rtx, GEN_INT (3)));
49753 else
49755 /* Put the elements into place for the multiply. */
49756 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49757 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49758 high_p = false;
49760 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49761 break;
49763 case V8SImode:
49764 /* Shuffle the elements between the lanes. After this we
49765 have { A B E F | C D G H } for each operand. */
49766 t1 = gen_reg_rtx (V4DImode);
49767 t2 = gen_reg_rtx (V4DImode);
49768 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49769 const0_rtx, const2_rtx,
49770 const1_rtx, GEN_INT (3)));
49771 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49772 const0_rtx, const2_rtx,
49773 const1_rtx, GEN_INT (3)));
49775 /* Shuffle the elements within the lanes. After this we
49776 have { A A B B | C C D D } or { E E F F | G G H H }. */
49777 t3 = gen_reg_rtx (V8SImode);
49778 t4 = gen_reg_rtx (V8SImode);
49779 mask = GEN_INT (high_p
49780 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49781 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49782 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49783 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49785 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49786 break;
49788 case V8HImode:
49789 case V16HImode:
49790 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49791 uns_p, OPTAB_DIRECT);
49792 t2 = expand_binop (mode,
49793 uns_p ? umul_highpart_optab : smul_highpart_optab,
49794 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49795 gcc_assert (t1 && t2);
49797 t3 = gen_reg_rtx (mode);
49798 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49799 emit_move_insn (dest, gen_lowpart (wmode, t3));
49800 break;
49802 case V16QImode:
49803 case V32QImode:
49804 case V32HImode:
49805 case V16SImode:
49806 case V64QImode:
49807 t1 = gen_reg_rtx (wmode);
49808 t2 = gen_reg_rtx (wmode);
49809 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49810 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49812 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
49813 break;
49815 default:
49816 gcc_unreachable ();
49820 void
49821 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49823 rtx res_1, res_2, res_3, res_4;
49825 res_1 = gen_reg_rtx (V4SImode);
49826 res_2 = gen_reg_rtx (V4SImode);
49827 res_3 = gen_reg_rtx (V2DImode);
49828 res_4 = gen_reg_rtx (V2DImode);
49829 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49830 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49832 /* Move the results in element 2 down to element 1; we don't care
49833 what goes in elements 2 and 3. Then we can merge the parts
49834 back together with an interleave.
49836 Note that two other sequences were tried:
49837 (1) Use interleaves at the start instead of psrldq, which allows
49838 us to use a single shufps to merge things back at the end.
49839 (2) Use shufps here to combine the two vectors, then pshufd to
49840 put the elements in the correct order.
49841 In both cases the cost of the reformatting stall was too high
49842 and the overall sequence slower. */
49844 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49845 const0_rtx, const2_rtx,
49846 const0_rtx, const0_rtx));
49847 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49848 const0_rtx, const2_rtx,
49849 const0_rtx, const0_rtx));
49850 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49852 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49855 void
49856 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49858 machine_mode mode = GET_MODE (op0);
49859 rtx t1, t2, t3, t4, t5, t6;
49861 if (TARGET_AVX512DQ && mode == V8DImode)
49862 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49863 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49864 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49865 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49866 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49867 else if (TARGET_XOP && mode == V2DImode)
49869 /* op1: A,B,C,D, op2: E,F,G,H */
49870 op1 = gen_lowpart (V4SImode, op1);
49871 op2 = gen_lowpart (V4SImode, op2);
49873 t1 = gen_reg_rtx (V4SImode);
49874 t2 = gen_reg_rtx (V4SImode);
49875 t3 = gen_reg_rtx (V2DImode);
49876 t4 = gen_reg_rtx (V2DImode);
49878 /* t1: B,A,D,C */
49879 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49880 GEN_INT (1),
49881 GEN_INT (0),
49882 GEN_INT (3),
49883 GEN_INT (2)));
49885 /* t2: (B*E),(A*F),(D*G),(C*H) */
49886 emit_insn (gen_mulv4si3 (t2, t1, op2));
49888 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49889 emit_insn (gen_xop_phadddq (t3, t2));
49891 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49892 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
49894 /* Multiply lower parts and add all */
49895 t5 = gen_reg_rtx (V2DImode);
49896 emit_insn (gen_vec_widen_umult_even_v4si (t5,
49897 gen_lowpart (V4SImode, op1),
49898 gen_lowpart (V4SImode, op2)));
49899 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
49902 else
49904 machine_mode nmode;
49905 rtx (*umul) (rtx, rtx, rtx);
49907 if (mode == V2DImode)
49909 umul = gen_vec_widen_umult_even_v4si;
49910 nmode = V4SImode;
49912 else if (mode == V4DImode)
49914 umul = gen_vec_widen_umult_even_v8si;
49915 nmode = V8SImode;
49917 else if (mode == V8DImode)
49919 umul = gen_vec_widen_umult_even_v16si;
49920 nmode = V16SImode;
49922 else
49923 gcc_unreachable ();
49926 /* Multiply low parts. */
49927 t1 = gen_reg_rtx (mode);
49928 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
49930 /* Shift input vectors right 32 bits so we can multiply high parts. */
49931 t6 = GEN_INT (32);
49932 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
49933 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
49935 /* Multiply high parts by low parts. */
49936 t4 = gen_reg_rtx (mode);
49937 t5 = gen_reg_rtx (mode);
49938 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
49939 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
49941 /* Combine and shift the highparts back. */
49942 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
49943 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
49945 /* Combine high and low parts. */
49946 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
49949 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49950 gen_rtx_MULT (mode, op1, op2));
49953 /* Return 1 if control tansfer instruction INSN
49954 should be encoded with bnd prefix.
49955 If insn is NULL then return 1 when control
49956 transfer instructions should be prefixed with
49957 bnd by default for current function. */
49959 bool
49960 ix86_bnd_prefixed_insn_p (rtx insn)
49962 /* For call insns check special flag. */
49963 if (insn && CALL_P (insn))
49965 rtx call = get_call_rtx_from (insn);
49966 if (call)
49967 return CALL_EXPR_WITH_BOUNDS_P (call);
49970 /* All other insns are prefixed only if function is instrumented. */
49971 return chkp_function_instrumented_p (current_function_decl);
49974 /* Calculate integer abs() using only SSE2 instructions. */
49976 void
49977 ix86_expand_sse2_abs (rtx target, rtx input)
49979 machine_mode mode = GET_MODE (target);
49980 rtx tmp0, tmp1, x;
49982 switch (mode)
49984 /* For 32-bit signed integer X, the best way to calculate the absolute
49985 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
49986 case V4SImode:
49987 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
49988 GEN_INT (GET_MODE_BITSIZE
49989 (GET_MODE_INNER (mode)) - 1),
49990 NULL, 0, OPTAB_DIRECT);
49991 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
49992 NULL, 0, OPTAB_DIRECT);
49993 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
49994 target, 0, OPTAB_DIRECT);
49995 break;
49997 /* For 16-bit signed integer X, the best way to calculate the absolute
49998 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
49999 case V8HImode:
50000 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50002 x = expand_simple_binop (mode, SMAX, tmp0, input,
50003 target, 0, OPTAB_DIRECT);
50004 break;
50006 /* For 8-bit signed integer X, the best way to calculate the absolute
50007 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50008 as SSE2 provides the PMINUB insn. */
50009 case V16QImode:
50010 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50012 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50013 target, 0, OPTAB_DIRECT);
50014 break;
50016 default:
50017 gcc_unreachable ();
50020 if (x != target)
50021 emit_move_insn (target, x);
50024 /* Expand an insert into a vector register through pinsr insn.
50025 Return true if successful. */
50027 bool
50028 ix86_expand_pinsr (rtx *operands)
50030 rtx dst = operands[0];
50031 rtx src = operands[3];
50033 unsigned int size = INTVAL (operands[1]);
50034 unsigned int pos = INTVAL (operands[2]);
50036 if (GET_CODE (dst) == SUBREG)
50038 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50039 dst = SUBREG_REG (dst);
50042 if (GET_CODE (src) == SUBREG)
50043 src = SUBREG_REG (src);
50045 switch (GET_MODE (dst))
50047 case V16QImode:
50048 case V8HImode:
50049 case V4SImode:
50050 case V2DImode:
50052 machine_mode srcmode, dstmode;
50053 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50055 srcmode = mode_for_size (size, MODE_INT, 0);
50057 switch (srcmode)
50059 case QImode:
50060 if (!TARGET_SSE4_1)
50061 return false;
50062 dstmode = V16QImode;
50063 pinsr = gen_sse4_1_pinsrb;
50064 break;
50066 case HImode:
50067 if (!TARGET_SSE2)
50068 return false;
50069 dstmode = V8HImode;
50070 pinsr = gen_sse2_pinsrw;
50071 break;
50073 case SImode:
50074 if (!TARGET_SSE4_1)
50075 return false;
50076 dstmode = V4SImode;
50077 pinsr = gen_sse4_1_pinsrd;
50078 break;
50080 case DImode:
50081 gcc_assert (TARGET_64BIT);
50082 if (!TARGET_SSE4_1)
50083 return false;
50084 dstmode = V2DImode;
50085 pinsr = gen_sse4_1_pinsrq;
50086 break;
50088 default:
50089 return false;
50092 rtx d = dst;
50093 if (GET_MODE (dst) != dstmode)
50094 d = gen_reg_rtx (dstmode);
50095 src = gen_lowpart (srcmode, src);
50097 pos /= size;
50099 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50100 GEN_INT (1 << pos)));
50101 if (d != dst)
50102 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50103 return true;
50106 default:
50107 return false;
50111 /* This function returns the calling abi specific va_list type node.
50112 It returns the FNDECL specific va_list type. */
50114 static tree
50115 ix86_fn_abi_va_list (tree fndecl)
50117 if (!TARGET_64BIT)
50118 return va_list_type_node;
50119 gcc_assert (fndecl != NULL_TREE);
50121 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50122 return ms_va_list_type_node;
50123 else
50124 return sysv_va_list_type_node;
50127 /* Returns the canonical va_list type specified by TYPE. If there
50128 is no valid TYPE provided, it return NULL_TREE. */
50130 static tree
50131 ix86_canonical_va_list_type (tree type)
50133 tree wtype, htype;
50135 /* Resolve references and pointers to va_list type. */
50136 if (TREE_CODE (type) == MEM_REF)
50137 type = TREE_TYPE (type);
50138 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50139 type = TREE_TYPE (type);
50140 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50141 type = TREE_TYPE (type);
50143 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50145 wtype = va_list_type_node;
50146 gcc_assert (wtype != NULL_TREE);
50147 htype = type;
50148 if (TREE_CODE (wtype) == ARRAY_TYPE)
50150 /* If va_list is an array type, the argument may have decayed
50151 to a pointer type, e.g. by being passed to another function.
50152 In that case, unwrap both types so that we can compare the
50153 underlying records. */
50154 if (TREE_CODE (htype) == ARRAY_TYPE
50155 || POINTER_TYPE_P (htype))
50157 wtype = TREE_TYPE (wtype);
50158 htype = TREE_TYPE (htype);
50161 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50162 return va_list_type_node;
50163 wtype = sysv_va_list_type_node;
50164 gcc_assert (wtype != NULL_TREE);
50165 htype = type;
50166 if (TREE_CODE (wtype) == ARRAY_TYPE)
50168 /* If va_list is an array type, the argument may have decayed
50169 to a pointer type, e.g. by being passed to another function.
50170 In that case, unwrap both types so that we can compare the
50171 underlying records. */
50172 if (TREE_CODE (htype) == ARRAY_TYPE
50173 || POINTER_TYPE_P (htype))
50175 wtype = TREE_TYPE (wtype);
50176 htype = TREE_TYPE (htype);
50179 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50180 return sysv_va_list_type_node;
50181 wtype = ms_va_list_type_node;
50182 gcc_assert (wtype != NULL_TREE);
50183 htype = type;
50184 if (TREE_CODE (wtype) == ARRAY_TYPE)
50186 /* If va_list is an array type, the argument may have decayed
50187 to a pointer type, e.g. by being passed to another function.
50188 In that case, unwrap both types so that we can compare the
50189 underlying records. */
50190 if (TREE_CODE (htype) == ARRAY_TYPE
50191 || POINTER_TYPE_P (htype))
50193 wtype = TREE_TYPE (wtype);
50194 htype = TREE_TYPE (htype);
50197 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50198 return ms_va_list_type_node;
50199 return NULL_TREE;
50201 return std_canonical_va_list_type (type);
50204 /* Iterate through the target-specific builtin types for va_list.
50205 IDX denotes the iterator, *PTREE is set to the result type of
50206 the va_list builtin, and *PNAME to its internal type.
50207 Returns zero if there is no element for this index, otherwise
50208 IDX should be increased upon the next call.
50209 Note, do not iterate a base builtin's name like __builtin_va_list.
50210 Used from c_common_nodes_and_builtins. */
50212 static int
50213 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50215 if (TARGET_64BIT)
50217 switch (idx)
50219 default:
50220 break;
50222 case 0:
50223 *ptree = ms_va_list_type_node;
50224 *pname = "__builtin_ms_va_list";
50225 return 1;
50227 case 1:
50228 *ptree = sysv_va_list_type_node;
50229 *pname = "__builtin_sysv_va_list";
50230 return 1;
50234 return 0;
50237 #undef TARGET_SCHED_DISPATCH
50238 #define TARGET_SCHED_DISPATCH has_dispatch
50239 #undef TARGET_SCHED_DISPATCH_DO
50240 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50241 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50242 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50243 #undef TARGET_SCHED_REORDER
50244 #define TARGET_SCHED_REORDER ix86_sched_reorder
50245 #undef TARGET_SCHED_ADJUST_PRIORITY
50246 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50247 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50248 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50249 ix86_dependencies_evaluation_hook
50251 /* The size of the dispatch window is the total number of bytes of
50252 object code allowed in a window. */
50253 #define DISPATCH_WINDOW_SIZE 16
50255 /* Number of dispatch windows considered for scheduling. */
50256 #define MAX_DISPATCH_WINDOWS 3
50258 /* Maximum number of instructions in a window. */
50259 #define MAX_INSN 4
50261 /* Maximum number of immediate operands in a window. */
50262 #define MAX_IMM 4
50264 /* Maximum number of immediate bits allowed in a window. */
50265 #define MAX_IMM_SIZE 128
50267 /* Maximum number of 32 bit immediates allowed in a window. */
50268 #define MAX_IMM_32 4
50270 /* Maximum number of 64 bit immediates allowed in a window. */
50271 #define MAX_IMM_64 2
50273 /* Maximum total of loads or prefetches allowed in a window. */
50274 #define MAX_LOAD 2
50276 /* Maximum total of stores allowed in a window. */
50277 #define MAX_STORE 1
50279 #undef BIG
50280 #define BIG 100
50283 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50284 enum dispatch_group {
50285 disp_no_group = 0,
50286 disp_load,
50287 disp_store,
50288 disp_load_store,
50289 disp_prefetch,
50290 disp_imm,
50291 disp_imm_32,
50292 disp_imm_64,
50293 disp_branch,
50294 disp_cmp,
50295 disp_jcc,
50296 disp_last
50299 /* Number of allowable groups in a dispatch window. It is an array
50300 indexed by dispatch_group enum. 100 is used as a big number,
50301 because the number of these kind of operations does not have any
50302 effect in dispatch window, but we need them for other reasons in
50303 the table. */
50304 static unsigned int num_allowable_groups[disp_last] = {
50305 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50308 char group_name[disp_last + 1][16] = {
50309 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50310 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50311 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50314 /* Instruction path. */
50315 enum insn_path {
50316 no_path = 0,
50317 path_single, /* Single micro op. */
50318 path_double, /* Double micro op. */
50319 path_multi, /* Instructions with more than 2 micro op.. */
50320 last_path
50323 /* sched_insn_info defines a window to the instructions scheduled in
50324 the basic block. It contains a pointer to the insn_info table and
50325 the instruction scheduled.
50327 Windows are allocated for each basic block and are linked
50328 together. */
50329 typedef struct sched_insn_info_s {
50330 rtx insn;
50331 enum dispatch_group group;
50332 enum insn_path path;
50333 int byte_len;
50334 int imm_bytes;
50335 } sched_insn_info;
50337 /* Linked list of dispatch windows. This is a two way list of
50338 dispatch windows of a basic block. It contains information about
50339 the number of uops in the window and the total number of
50340 instructions and of bytes in the object code for this dispatch
50341 window. */
50342 typedef struct dispatch_windows_s {
50343 int num_insn; /* Number of insn in the window. */
50344 int num_uops; /* Number of uops in the window. */
50345 int window_size; /* Number of bytes in the window. */
50346 int window_num; /* Window number between 0 or 1. */
50347 int num_imm; /* Number of immediates in an insn. */
50348 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50349 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50350 int imm_size; /* Total immediates in the window. */
50351 int num_loads; /* Total memory loads in the window. */
50352 int num_stores; /* Total memory stores in the window. */
50353 int violation; /* Violation exists in window. */
50354 sched_insn_info *window; /* Pointer to the window. */
50355 struct dispatch_windows_s *next;
50356 struct dispatch_windows_s *prev;
50357 } dispatch_windows;
50359 /* Immediate valuse used in an insn. */
50360 typedef struct imm_info_s
50362 int imm;
50363 int imm32;
50364 int imm64;
50365 } imm_info;
50367 static dispatch_windows *dispatch_window_list;
50368 static dispatch_windows *dispatch_window_list1;
50370 /* Get dispatch group of insn. */
50372 static enum dispatch_group
50373 get_mem_group (rtx_insn *insn)
50375 enum attr_memory memory;
50377 if (INSN_CODE (insn) < 0)
50378 return disp_no_group;
50379 memory = get_attr_memory (insn);
50380 if (memory == MEMORY_STORE)
50381 return disp_store;
50383 if (memory == MEMORY_LOAD)
50384 return disp_load;
50386 if (memory == MEMORY_BOTH)
50387 return disp_load_store;
50389 return disp_no_group;
50392 /* Return true if insn is a compare instruction. */
50394 static bool
50395 is_cmp (rtx_insn *insn)
50397 enum attr_type type;
50399 type = get_attr_type (insn);
50400 return (type == TYPE_TEST
50401 || type == TYPE_ICMP
50402 || type == TYPE_FCMP
50403 || GET_CODE (PATTERN (insn)) == COMPARE);
50406 /* Return true if a dispatch violation encountered. */
50408 static bool
50409 dispatch_violation (void)
50411 if (dispatch_window_list->next)
50412 return dispatch_window_list->next->violation;
50413 return dispatch_window_list->violation;
50416 /* Return true if insn is a branch instruction. */
50418 static bool
50419 is_branch (rtx_insn *insn)
50421 return (CALL_P (insn) || JUMP_P (insn));
50424 /* Return true if insn is a prefetch instruction. */
50426 static bool
50427 is_prefetch (rtx_insn *insn)
50429 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50432 /* This function initializes a dispatch window and the list container holding a
50433 pointer to the window. */
50435 static void
50436 init_window (int window_num)
50438 int i;
50439 dispatch_windows *new_list;
50441 if (window_num == 0)
50442 new_list = dispatch_window_list;
50443 else
50444 new_list = dispatch_window_list1;
50446 new_list->num_insn = 0;
50447 new_list->num_uops = 0;
50448 new_list->window_size = 0;
50449 new_list->next = NULL;
50450 new_list->prev = NULL;
50451 new_list->window_num = window_num;
50452 new_list->num_imm = 0;
50453 new_list->num_imm_32 = 0;
50454 new_list->num_imm_64 = 0;
50455 new_list->imm_size = 0;
50456 new_list->num_loads = 0;
50457 new_list->num_stores = 0;
50458 new_list->violation = false;
50460 for (i = 0; i < MAX_INSN; i++)
50462 new_list->window[i].insn = NULL;
50463 new_list->window[i].group = disp_no_group;
50464 new_list->window[i].path = no_path;
50465 new_list->window[i].byte_len = 0;
50466 new_list->window[i].imm_bytes = 0;
50468 return;
50471 /* This function allocates and initializes a dispatch window and the
50472 list container holding a pointer to the window. */
50474 static dispatch_windows *
50475 allocate_window (void)
50477 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50478 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50480 return new_list;
50483 /* This routine initializes the dispatch scheduling information. It
50484 initiates building dispatch scheduler tables and constructs the
50485 first dispatch window. */
50487 static void
50488 init_dispatch_sched (void)
50490 /* Allocate a dispatch list and a window. */
50491 dispatch_window_list = allocate_window ();
50492 dispatch_window_list1 = allocate_window ();
50493 init_window (0);
50494 init_window (1);
50497 /* This function returns true if a branch is detected. End of a basic block
50498 does not have to be a branch, but here we assume only branches end a
50499 window. */
50501 static bool
50502 is_end_basic_block (enum dispatch_group group)
50504 return group == disp_branch;
50507 /* This function is called when the end of a window processing is reached. */
50509 static void
50510 process_end_window (void)
50512 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50513 if (dispatch_window_list->next)
50515 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50516 gcc_assert (dispatch_window_list->window_size
50517 + dispatch_window_list1->window_size <= 48);
50518 init_window (1);
50520 init_window (0);
50523 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50524 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50525 for 48 bytes of instructions. Note that these windows are not dispatch
50526 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50528 static dispatch_windows *
50529 allocate_next_window (int window_num)
50531 if (window_num == 0)
50533 if (dispatch_window_list->next)
50534 init_window (1);
50535 init_window (0);
50536 return dispatch_window_list;
50539 dispatch_window_list->next = dispatch_window_list1;
50540 dispatch_window_list1->prev = dispatch_window_list;
50542 return dispatch_window_list1;
50545 /* Compute number of immediate operands of an instruction. */
50547 static void
50548 find_constant (rtx in_rtx, imm_info *imm_values)
50550 if (INSN_P (in_rtx))
50551 in_rtx = PATTERN (in_rtx);
50552 subrtx_iterator::array_type array;
50553 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50554 if (const_rtx x = *iter)
50555 switch (GET_CODE (x))
50557 case CONST:
50558 case SYMBOL_REF:
50559 case CONST_INT:
50560 (imm_values->imm)++;
50561 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50562 (imm_values->imm32)++;
50563 else
50564 (imm_values->imm64)++;
50565 break;
50567 case CONST_DOUBLE:
50568 case CONST_WIDE_INT:
50569 (imm_values->imm)++;
50570 (imm_values->imm64)++;
50571 break;
50573 case CODE_LABEL:
50574 if (LABEL_KIND (x) == LABEL_NORMAL)
50576 (imm_values->imm)++;
50577 (imm_values->imm32)++;
50579 break;
50581 default:
50582 break;
50586 /* Return total size of immediate operands of an instruction along with number
50587 of corresponding immediate-operands. It initializes its parameters to zero
50588 befor calling FIND_CONSTANT.
50589 INSN is the input instruction. IMM is the total of immediates.
50590 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50591 bit immediates. */
50593 static int
50594 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
50596 imm_info imm_values = {0, 0, 0};
50598 find_constant (insn, &imm_values);
50599 *imm = imm_values.imm;
50600 *imm32 = imm_values.imm32;
50601 *imm64 = imm_values.imm64;
50602 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50605 /* This function indicates if an operand of an instruction is an
50606 immediate. */
50608 static bool
50609 has_immediate (rtx_insn *insn)
50611 int num_imm_operand;
50612 int num_imm32_operand;
50613 int num_imm64_operand;
50615 if (insn)
50616 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50617 &num_imm64_operand);
50618 return false;
50621 /* Return single or double path for instructions. */
50623 static enum insn_path
50624 get_insn_path (rtx_insn *insn)
50626 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50628 if ((int)path == 0)
50629 return path_single;
50631 if ((int)path == 1)
50632 return path_double;
50634 return path_multi;
50637 /* Return insn dispatch group. */
50639 static enum dispatch_group
50640 get_insn_group (rtx_insn *insn)
50642 enum dispatch_group group = get_mem_group (insn);
50643 if (group)
50644 return group;
50646 if (is_branch (insn))
50647 return disp_branch;
50649 if (is_cmp (insn))
50650 return disp_cmp;
50652 if (has_immediate (insn))
50653 return disp_imm;
50655 if (is_prefetch (insn))
50656 return disp_prefetch;
50658 return disp_no_group;
50661 /* Count number of GROUP restricted instructions in a dispatch
50662 window WINDOW_LIST. */
50664 static int
50665 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50667 enum dispatch_group group = get_insn_group (insn);
50668 int imm_size;
50669 int num_imm_operand;
50670 int num_imm32_operand;
50671 int num_imm64_operand;
50673 if (group == disp_no_group)
50674 return 0;
50676 if (group == disp_imm)
50678 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50679 &num_imm64_operand);
50680 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50681 || num_imm_operand + window_list->num_imm > MAX_IMM
50682 || (num_imm32_operand > 0
50683 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50684 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50685 || (num_imm64_operand > 0
50686 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50687 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50688 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50689 && num_imm64_operand > 0
50690 && ((window_list->num_imm_64 > 0
50691 && window_list->num_insn >= 2)
50692 || window_list->num_insn >= 3)))
50693 return BIG;
50695 return 1;
50698 if ((group == disp_load_store
50699 && (window_list->num_loads >= MAX_LOAD
50700 || window_list->num_stores >= MAX_STORE))
50701 || ((group == disp_load
50702 || group == disp_prefetch)
50703 && window_list->num_loads >= MAX_LOAD)
50704 || (group == disp_store
50705 && window_list->num_stores >= MAX_STORE))
50706 return BIG;
50708 return 1;
50711 /* This function returns true if insn satisfies dispatch rules on the
50712 last window scheduled. */
50714 static bool
50715 fits_dispatch_window (rtx_insn *insn)
50717 dispatch_windows *window_list = dispatch_window_list;
50718 dispatch_windows *window_list_next = dispatch_window_list->next;
50719 unsigned int num_restrict;
50720 enum dispatch_group group = get_insn_group (insn);
50721 enum insn_path path = get_insn_path (insn);
50722 int sum;
50724 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50725 instructions should be given the lowest priority in the
50726 scheduling process in Haifa scheduler to make sure they will be
50727 scheduled in the same dispatch window as the reference to them. */
50728 if (group == disp_jcc || group == disp_cmp)
50729 return false;
50731 /* Check nonrestricted. */
50732 if (group == disp_no_group || group == disp_branch)
50733 return true;
50735 /* Get last dispatch window. */
50736 if (window_list_next)
50737 window_list = window_list_next;
50739 if (window_list->window_num == 1)
50741 sum = window_list->prev->window_size + window_list->window_size;
50743 if (sum == 32
50744 || (min_insn_size (insn) + sum) >= 48)
50745 /* Window 1 is full. Go for next window. */
50746 return true;
50749 num_restrict = count_num_restricted (insn, window_list);
50751 if (num_restrict > num_allowable_groups[group])
50752 return false;
50754 /* See if it fits in the first window. */
50755 if (window_list->window_num == 0)
50757 /* The first widow should have only single and double path
50758 uops. */
50759 if (path == path_double
50760 && (window_list->num_uops + 2) > MAX_INSN)
50761 return false;
50762 else if (path != path_single)
50763 return false;
50765 return true;
50768 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50769 dispatch window WINDOW_LIST. */
50771 static void
50772 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50774 int byte_len = min_insn_size (insn);
50775 int num_insn = window_list->num_insn;
50776 int imm_size;
50777 sched_insn_info *window = window_list->window;
50778 enum dispatch_group group = get_insn_group (insn);
50779 enum insn_path path = get_insn_path (insn);
50780 int num_imm_operand;
50781 int num_imm32_operand;
50782 int num_imm64_operand;
50784 if (!window_list->violation && group != disp_cmp
50785 && !fits_dispatch_window (insn))
50786 window_list->violation = true;
50788 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50789 &num_imm64_operand);
50791 /* Initialize window with new instruction. */
50792 window[num_insn].insn = insn;
50793 window[num_insn].byte_len = byte_len;
50794 window[num_insn].group = group;
50795 window[num_insn].path = path;
50796 window[num_insn].imm_bytes = imm_size;
50798 window_list->window_size += byte_len;
50799 window_list->num_insn = num_insn + 1;
50800 window_list->num_uops = window_list->num_uops + num_uops;
50801 window_list->imm_size += imm_size;
50802 window_list->num_imm += num_imm_operand;
50803 window_list->num_imm_32 += num_imm32_operand;
50804 window_list->num_imm_64 += num_imm64_operand;
50806 if (group == disp_store)
50807 window_list->num_stores += 1;
50808 else if (group == disp_load
50809 || group == disp_prefetch)
50810 window_list->num_loads += 1;
50811 else if (group == disp_load_store)
50813 window_list->num_stores += 1;
50814 window_list->num_loads += 1;
50818 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50819 If the total bytes of instructions or the number of instructions in
50820 the window exceed allowable, it allocates a new window. */
50822 static void
50823 add_to_dispatch_window (rtx_insn *insn)
50825 int byte_len;
50826 dispatch_windows *window_list;
50827 dispatch_windows *next_list;
50828 dispatch_windows *window0_list;
50829 enum insn_path path;
50830 enum dispatch_group insn_group;
50831 bool insn_fits;
50832 int num_insn;
50833 int num_uops;
50834 int window_num;
50835 int insn_num_uops;
50836 int sum;
50838 if (INSN_CODE (insn) < 0)
50839 return;
50841 byte_len = min_insn_size (insn);
50842 window_list = dispatch_window_list;
50843 next_list = window_list->next;
50844 path = get_insn_path (insn);
50845 insn_group = get_insn_group (insn);
50847 /* Get the last dispatch window. */
50848 if (next_list)
50849 window_list = dispatch_window_list->next;
50851 if (path == path_single)
50852 insn_num_uops = 1;
50853 else if (path == path_double)
50854 insn_num_uops = 2;
50855 else
50856 insn_num_uops = (int) path;
50858 /* If current window is full, get a new window.
50859 Window number zero is full, if MAX_INSN uops are scheduled in it.
50860 Window number one is full, if window zero's bytes plus window
50861 one's bytes is 32, or if the bytes of the new instruction added
50862 to the total makes it greater than 48, or it has already MAX_INSN
50863 instructions in it. */
50864 num_insn = window_list->num_insn;
50865 num_uops = window_list->num_uops;
50866 window_num = window_list->window_num;
50867 insn_fits = fits_dispatch_window (insn);
50869 if (num_insn >= MAX_INSN
50870 || num_uops + insn_num_uops > MAX_INSN
50871 || !(insn_fits))
50873 window_num = ~window_num & 1;
50874 window_list = allocate_next_window (window_num);
50877 if (window_num == 0)
50879 add_insn_window (insn, window_list, insn_num_uops);
50880 if (window_list->num_insn >= MAX_INSN
50881 && insn_group == disp_branch)
50883 process_end_window ();
50884 return;
50887 else if (window_num == 1)
50889 window0_list = window_list->prev;
50890 sum = window0_list->window_size + window_list->window_size;
50891 if (sum == 32
50892 || (byte_len + sum) >= 48)
50894 process_end_window ();
50895 window_list = dispatch_window_list;
50898 add_insn_window (insn, window_list, insn_num_uops);
50900 else
50901 gcc_unreachable ();
50903 if (is_end_basic_block (insn_group))
50905 /* End of basic block is reached do end-basic-block process. */
50906 process_end_window ();
50907 return;
50911 /* Print the dispatch window, WINDOW_NUM, to FILE. */
50913 DEBUG_FUNCTION static void
50914 debug_dispatch_window_file (FILE *file, int window_num)
50916 dispatch_windows *list;
50917 int i;
50919 if (window_num == 0)
50920 list = dispatch_window_list;
50921 else
50922 list = dispatch_window_list1;
50924 fprintf (file, "Window #%d:\n", list->window_num);
50925 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
50926 list->num_insn, list->num_uops, list->window_size);
50927 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50928 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
50930 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
50931 list->num_stores);
50932 fprintf (file, " insn info:\n");
50934 for (i = 0; i < MAX_INSN; i++)
50936 if (!list->window[i].insn)
50937 break;
50938 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
50939 i, group_name[list->window[i].group],
50940 i, (void *)list->window[i].insn,
50941 i, list->window[i].path,
50942 i, list->window[i].byte_len,
50943 i, list->window[i].imm_bytes);
50947 /* Print to stdout a dispatch window. */
50949 DEBUG_FUNCTION void
50950 debug_dispatch_window (int window_num)
50952 debug_dispatch_window_file (stdout, window_num);
50955 /* Print INSN dispatch information to FILE. */
50957 DEBUG_FUNCTION static void
50958 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
50960 int byte_len;
50961 enum insn_path path;
50962 enum dispatch_group group;
50963 int imm_size;
50964 int num_imm_operand;
50965 int num_imm32_operand;
50966 int num_imm64_operand;
50968 if (INSN_CODE (insn) < 0)
50969 return;
50971 byte_len = min_insn_size (insn);
50972 path = get_insn_path (insn);
50973 group = get_insn_group (insn);
50974 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50975 &num_imm64_operand);
50977 fprintf (file, " insn info:\n");
50978 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
50979 group_name[group], path, byte_len);
50980 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
50981 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
50984 /* Print to STDERR the status of the ready list with respect to
50985 dispatch windows. */
50987 DEBUG_FUNCTION void
50988 debug_ready_dispatch (void)
50990 int i;
50991 int no_ready = number_in_ready ();
50993 fprintf (stdout, "Number of ready: %d\n", no_ready);
50995 for (i = 0; i < no_ready; i++)
50996 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
50999 /* This routine is the driver of the dispatch scheduler. */
51001 static void
51002 do_dispatch (rtx_insn *insn, int mode)
51004 if (mode == DISPATCH_INIT)
51005 init_dispatch_sched ();
51006 else if (mode == ADD_TO_DISPATCH_WINDOW)
51007 add_to_dispatch_window (insn);
51010 /* Return TRUE if Dispatch Scheduling is supported. */
51012 static bool
51013 has_dispatch (rtx_insn *insn, int action)
51015 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51016 && flag_dispatch_scheduler)
51017 switch (action)
51019 default:
51020 return false;
51022 case IS_DISPATCH_ON:
51023 return true;
51024 break;
51026 case IS_CMP:
51027 return is_cmp (insn);
51029 case DISPATCH_VIOLATION:
51030 return dispatch_violation ();
51032 case FITS_DISPATCH_WINDOW:
51033 return fits_dispatch_window (insn);
51036 return false;
51039 /* Implementation of reassociation_width target hook used by
51040 reassoc phase to identify parallelism level in reassociated
51041 tree. Statements tree_code is passed in OPC. Arguments type
51042 is passed in MODE.
51044 Currently parallel reassociation is enabled for Atom
51045 processors only and we set reassociation width to be 2
51046 because Atom may issue up to 2 instructions per cycle.
51048 Return value should be fixed if parallel reassociation is
51049 enabled for other processors. */
51051 static int
51052 ix86_reassociation_width (unsigned int, machine_mode mode)
51054 /* Vector part. */
51055 if (VECTOR_MODE_P (mode))
51057 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51058 return 2;
51059 else
51060 return 1;
51063 /* Scalar part. */
51064 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51065 return 2;
51066 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51067 return 2;
51068 else
51069 return 1;
51072 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51073 place emms and femms instructions. */
51075 static machine_mode
51076 ix86_preferred_simd_mode (machine_mode mode)
51078 if (!TARGET_SSE)
51079 return word_mode;
51081 switch (mode)
51083 case QImode:
51084 return TARGET_AVX512BW ? V64QImode :
51085 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51086 case HImode:
51087 return TARGET_AVX512BW ? V32HImode :
51088 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51089 case SImode:
51090 return TARGET_AVX512F ? V16SImode :
51091 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51092 case DImode:
51093 return TARGET_AVX512F ? V8DImode :
51094 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51096 case SFmode:
51097 if (TARGET_AVX512F)
51098 return V16SFmode;
51099 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51100 return V8SFmode;
51101 else
51102 return V4SFmode;
51104 case DFmode:
51105 if (!TARGET_VECTORIZE_DOUBLE)
51106 return word_mode;
51107 else if (TARGET_AVX512F)
51108 return V8DFmode;
51109 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51110 return V4DFmode;
51111 else if (TARGET_SSE2)
51112 return V2DFmode;
51113 /* FALLTHRU */
51115 default:
51116 return word_mode;
51120 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51121 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51122 256bit and 128bit vectors. */
51124 static unsigned int
51125 ix86_autovectorize_vector_sizes (void)
51127 return TARGET_AVX512F ? 64 | 32 | 16 :
51128 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51133 /* Return class of registers which could be used for pseudo of MODE
51134 and of class RCLASS for spilling instead of memory. Return NO_REGS
51135 if it is not possible or non-profitable. */
51136 static reg_class_t
51137 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51139 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51140 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51141 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51142 return ALL_SSE_REGS;
51143 return NO_REGS;
51146 /* Implement targetm.vectorize.init_cost. */
51148 static void *
51149 ix86_init_cost (struct loop *)
51151 unsigned *cost = XNEWVEC (unsigned, 3);
51152 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51153 return cost;
51156 /* Implement targetm.vectorize.add_stmt_cost. */
51158 static unsigned
51159 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51160 struct _stmt_vec_info *stmt_info, int misalign,
51161 enum vect_cost_model_location where)
51163 unsigned *cost = (unsigned *) data;
51164 unsigned retval = 0;
51166 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51167 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51169 /* Statements in an inner loop relative to the loop being
51170 vectorized are weighted more heavily. The value here is
51171 arbitrary and could potentially be improved with analysis. */
51172 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51173 count *= 50; /* FIXME. */
51175 retval = (unsigned) (count * stmt_cost);
51177 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51178 for Silvermont as it has out of order integer pipeline and can execute
51179 2 scalar instruction per tick, but has in order SIMD pipeline. */
51180 if (TARGET_SILVERMONT || TARGET_INTEL)
51181 if (stmt_info && stmt_info->stmt)
51183 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51184 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51185 retval = (retval * 17) / 10;
51188 cost[where] += retval;
51190 return retval;
51193 /* Implement targetm.vectorize.finish_cost. */
51195 static void
51196 ix86_finish_cost (void *data, unsigned *prologue_cost,
51197 unsigned *body_cost, unsigned *epilogue_cost)
51199 unsigned *cost = (unsigned *) data;
51200 *prologue_cost = cost[vect_prologue];
51201 *body_cost = cost[vect_body];
51202 *epilogue_cost = cost[vect_epilogue];
51205 /* Implement targetm.vectorize.destroy_cost_data. */
51207 static void
51208 ix86_destroy_cost_data (void *data)
51210 free (data);
51213 /* Validate target specific memory model bits in VAL. */
51215 static unsigned HOST_WIDE_INT
51216 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51218 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51219 bool strong;
51221 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51222 |MEMMODEL_MASK)
51223 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51225 warning (OPT_Winvalid_memory_model,
51226 "Unknown architecture specific memory model");
51227 return MEMMODEL_SEQ_CST;
51229 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51230 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51232 warning (OPT_Winvalid_memory_model,
51233 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51234 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51236 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51238 warning (OPT_Winvalid_memory_model,
51239 "HLE_RELEASE not used with RELEASE or stronger memory model");
51240 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51242 return val;
51245 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51246 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51247 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51248 or number of vecsize_mangle variants that should be emitted. */
51250 static int
51251 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51252 struct cgraph_simd_clone *clonei,
51253 tree base_type, int num)
51255 int ret = 1;
51257 if (clonei->simdlen
51258 && (clonei->simdlen < 2
51259 || clonei->simdlen > 16
51260 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51262 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51263 "unsupported simdlen %d", clonei->simdlen);
51264 return 0;
51267 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51268 if (TREE_CODE (ret_type) != VOID_TYPE)
51269 switch (TYPE_MODE (ret_type))
51271 case QImode:
51272 case HImode:
51273 case SImode:
51274 case DImode:
51275 case SFmode:
51276 case DFmode:
51277 /* case SCmode: */
51278 /* case DCmode: */
51279 break;
51280 default:
51281 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51282 "unsupported return type %qT for simd\n", ret_type);
51283 return 0;
51286 tree t;
51287 int i;
51289 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51290 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51291 switch (TYPE_MODE (TREE_TYPE (t)))
51293 case QImode:
51294 case HImode:
51295 case SImode:
51296 case DImode:
51297 case SFmode:
51298 case DFmode:
51299 /* case SCmode: */
51300 /* case DCmode: */
51301 break;
51302 default:
51303 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51304 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51305 return 0;
51308 if (clonei->cilk_elemental)
51310 /* Parse here processor clause. If not present, default to 'b'. */
51311 clonei->vecsize_mangle = 'b';
51313 else if (!TREE_PUBLIC (node->decl))
51315 /* If the function isn't exported, we can pick up just one ISA
51316 for the clones. */
51317 if (TARGET_AVX2)
51318 clonei->vecsize_mangle = 'd';
51319 else if (TARGET_AVX)
51320 clonei->vecsize_mangle = 'c';
51321 else
51322 clonei->vecsize_mangle = 'b';
51323 ret = 1;
51325 else
51327 clonei->vecsize_mangle = "bcd"[num];
51328 ret = 3;
51330 switch (clonei->vecsize_mangle)
51332 case 'b':
51333 clonei->vecsize_int = 128;
51334 clonei->vecsize_float = 128;
51335 break;
51336 case 'c':
51337 clonei->vecsize_int = 128;
51338 clonei->vecsize_float = 256;
51339 break;
51340 case 'd':
51341 clonei->vecsize_int = 256;
51342 clonei->vecsize_float = 256;
51343 break;
51345 if (clonei->simdlen == 0)
51347 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51348 clonei->simdlen = clonei->vecsize_int;
51349 else
51350 clonei->simdlen = clonei->vecsize_float;
51351 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51352 if (clonei->simdlen > 16)
51353 clonei->simdlen = 16;
51355 return ret;
51358 /* Add target attribute to SIMD clone NODE if needed. */
51360 static void
51361 ix86_simd_clone_adjust (struct cgraph_node *node)
51363 const char *str = NULL;
51364 gcc_assert (node->decl == cfun->decl);
51365 switch (node->simdclone->vecsize_mangle)
51367 case 'b':
51368 if (!TARGET_SSE2)
51369 str = "sse2";
51370 break;
51371 case 'c':
51372 if (!TARGET_AVX)
51373 str = "avx";
51374 break;
51375 case 'd':
51376 if (!TARGET_AVX2)
51377 str = "avx2";
51378 break;
51379 default:
51380 gcc_unreachable ();
51382 if (str == NULL)
51383 return;
51384 push_cfun (NULL);
51385 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51386 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51387 gcc_assert (ok);
51388 pop_cfun ();
51389 ix86_reset_previous_fndecl ();
51390 ix86_set_current_function (node->decl);
51393 /* If SIMD clone NODE can't be used in a vectorized loop
51394 in current function, return -1, otherwise return a badness of using it
51395 (0 if it is most desirable from vecsize_mangle point of view, 1
51396 slightly less desirable, etc.). */
51398 static int
51399 ix86_simd_clone_usable (struct cgraph_node *node)
51401 switch (node->simdclone->vecsize_mangle)
51403 case 'b':
51404 if (!TARGET_SSE2)
51405 return -1;
51406 if (!TARGET_AVX)
51407 return 0;
51408 return TARGET_AVX2 ? 2 : 1;
51409 case 'c':
51410 if (!TARGET_AVX)
51411 return -1;
51412 return TARGET_AVX2 ? 1 : 0;
51413 break;
51414 case 'd':
51415 if (!TARGET_AVX2)
51416 return -1;
51417 return 0;
51418 default:
51419 gcc_unreachable ();
51423 /* This function adjusts the unroll factor based on
51424 the hardware capabilities. For ex, bdver3 has
51425 a loop buffer which makes unrolling of smaller
51426 loops less important. This function decides the
51427 unroll factor using number of memory references
51428 (value 32 is used) as a heuristic. */
51430 static unsigned
51431 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51433 basic_block *bbs;
51434 rtx_insn *insn;
51435 unsigned i;
51436 unsigned mem_count = 0;
51438 if (!TARGET_ADJUST_UNROLL)
51439 return nunroll;
51441 /* Count the number of memory references within the loop body.
51442 This value determines the unrolling factor for bdver3 and bdver4
51443 architectures. */
51444 subrtx_iterator::array_type array;
51445 bbs = get_loop_body (loop);
51446 for (i = 0; i < loop->num_nodes; i++)
51447 FOR_BB_INSNS (bbs[i], insn)
51448 if (NONDEBUG_INSN_P (insn))
51449 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51450 if (const_rtx x = *iter)
51451 if (MEM_P (x))
51453 machine_mode mode = GET_MODE (x);
51454 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51455 if (n_words > 4)
51456 mem_count += 2;
51457 else
51458 mem_count += 1;
51460 free (bbs);
51462 if (mem_count && mem_count <=32)
51463 return 32/mem_count;
51465 return nunroll;
51469 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51471 static bool
51472 ix86_float_exceptions_rounding_supported_p (void)
51474 /* For x87 floating point with standard excess precision handling,
51475 there is no adddf3 pattern (since x87 floating point only has
51476 XFmode operations) so the default hook implementation gets this
51477 wrong. */
51478 return TARGET_80387 || TARGET_SSE_MATH;
51481 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51483 static void
51484 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51486 if (!TARGET_80387 && !TARGET_SSE_MATH)
51487 return;
51488 tree exceptions_var = create_tmp_var (integer_type_node);
51489 if (TARGET_80387)
51491 tree fenv_index_type = build_index_type (size_int (6));
51492 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51493 tree fenv_var = create_tmp_var (fenv_type);
51494 mark_addressable (fenv_var);
51495 tree fenv_ptr = build_pointer_type (fenv_type);
51496 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51497 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51498 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51499 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51500 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51501 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51502 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51503 tree hold_fnclex = build_call_expr (fnclex, 0);
51504 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51505 hold_fnclex);
51506 *clear = build_call_expr (fnclex, 0);
51507 tree sw_var = create_tmp_var (short_unsigned_type_node);
51508 tree fnstsw_call = build_call_expr (fnstsw, 0);
51509 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51510 sw_var, fnstsw_call);
51511 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51512 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51513 exceptions_var, exceptions_x87);
51514 *update = build2 (COMPOUND_EXPR, integer_type_node,
51515 sw_mod, update_mod);
51516 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51517 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51519 if (TARGET_SSE_MATH)
51521 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51522 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51523 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51524 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51525 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51526 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51527 mxcsr_orig_var, stmxcsr_hold_call);
51528 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51529 mxcsr_orig_var,
51530 build_int_cst (unsigned_type_node, 0x1f80));
51531 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51532 build_int_cst (unsigned_type_node, 0xffffffc0));
51533 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51534 mxcsr_mod_var, hold_mod_val);
51535 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51536 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51537 hold_assign_orig, hold_assign_mod);
51538 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51539 ldmxcsr_hold_call);
51540 if (*hold)
51541 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51542 else
51543 *hold = hold_all;
51544 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51545 if (*clear)
51546 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51547 ldmxcsr_clear_call);
51548 else
51549 *clear = ldmxcsr_clear_call;
51550 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51551 tree exceptions_sse = fold_convert (integer_type_node,
51552 stxmcsr_update_call);
51553 if (*update)
51555 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51556 exceptions_var, exceptions_sse);
51557 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51558 exceptions_var, exceptions_mod);
51559 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51560 exceptions_assign);
51562 else
51563 *update = build2 (MODIFY_EXPR, integer_type_node,
51564 exceptions_var, exceptions_sse);
51565 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51566 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51567 ldmxcsr_update_call);
51569 tree atomic_feraiseexcept
51570 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51571 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51572 1, exceptions_var);
51573 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51574 atomic_feraiseexcept_call);
51577 /* Return mode to be used for bounds or VOIDmode
51578 if bounds are not supported. */
51580 static enum machine_mode
51581 ix86_mpx_bound_mode ()
51583 /* Do not support pointer checker if MPX
51584 is not enabled. */
51585 if (!TARGET_MPX)
51587 if (flag_check_pointer_bounds)
51588 warning (0, "Pointer Checker requires MPX support on this target."
51589 " Use -mmpx options to enable MPX.");
51590 return VOIDmode;
51593 return BNDmode;
51596 /* Return constant used to statically initialize constant bounds.
51598 This function is used to create special bound values. For now
51599 only INIT bounds and NONE bounds are expected. More special
51600 values may be added later. */
51602 static tree
51603 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51605 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51606 : build_zero_cst (pointer_sized_int_node);
51607 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51608 : build_minus_one_cst (pointer_sized_int_node);
51610 /* This function is supposed to be used to create INIT and
51611 NONE bounds only. */
51612 gcc_assert ((lb == 0 && ub == -1)
51613 || (lb == -1 && ub == 0));
51615 return build_complex (NULL, low, high);
51618 /* Generate a list of statements STMTS to initialize pointer bounds
51619 variable VAR with bounds LB and UB. Return the number of generated
51620 statements. */
51622 static int
51623 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51625 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51626 tree lhs, modify, var_p;
51628 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51629 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51631 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51632 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51633 append_to_statement_list (modify, stmts);
51635 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51636 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51637 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51638 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51639 append_to_statement_list (modify, stmts);
51641 return 2;
51644 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
51645 /* For i386, common symbol is local only for non-PIE binaries. For
51646 x86-64, common symbol is local only for non-PIE binaries or linker
51647 supports copy reloc in PIE binaries. */
51649 static bool
51650 ix86_binds_local_p (const_tree exp)
51652 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
51653 (!flag_pic
51654 || (TARGET_64BIT
51655 && HAVE_LD_PIE_COPYRELOC != 0)));
51657 #endif
51659 /* If MEM is in the form of [base+offset], extract the two parts
51660 of address and set to BASE and OFFSET, otherwise return false. */
51662 static bool
51663 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
51665 rtx addr;
51667 gcc_assert (MEM_P (mem));
51669 addr = XEXP (mem, 0);
51671 if (GET_CODE (addr) == CONST)
51672 addr = XEXP (addr, 0);
51674 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
51676 *base = addr;
51677 *offset = const0_rtx;
51678 return true;
51681 if (GET_CODE (addr) == PLUS
51682 && (REG_P (XEXP (addr, 0))
51683 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
51684 && CONST_INT_P (XEXP (addr, 1)))
51686 *base = XEXP (addr, 0);
51687 *offset = XEXP (addr, 1);
51688 return true;
51691 return false;
51694 /* Given OPERANDS of consecutive load/store, check if we can merge
51695 them into move multiple. LOAD is true if they are load instructions.
51696 MODE is the mode of memory operands. */
51698 bool
51699 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
51700 enum machine_mode mode)
51702 HOST_WIDE_INT offval_1, offval_2, msize;
51703 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
51705 if (load)
51707 mem_1 = operands[1];
51708 mem_2 = operands[3];
51709 reg_1 = operands[0];
51710 reg_2 = operands[2];
51712 else
51714 mem_1 = operands[0];
51715 mem_2 = operands[2];
51716 reg_1 = operands[1];
51717 reg_2 = operands[3];
51720 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
51722 if (REGNO (reg_1) != REGNO (reg_2))
51723 return false;
51725 /* Check if the addresses are in the form of [base+offset]. */
51726 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
51727 return false;
51728 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
51729 return false;
51731 /* Check if the bases are the same. */
51732 if (!rtx_equal_p (base_1, base_2))
51733 return false;
51735 offval_1 = INTVAL (offset_1);
51736 offval_2 = INTVAL (offset_2);
51737 msize = GET_MODE_SIZE (mode);
51738 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
51739 if (offval_1 + msize != offval_2)
51740 return false;
51742 return true;
51745 /* Initialize the GCC target structure. */
51746 #undef TARGET_RETURN_IN_MEMORY
51747 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51749 #undef TARGET_LEGITIMIZE_ADDRESS
51750 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51752 #undef TARGET_ATTRIBUTE_TABLE
51753 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51754 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51755 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51756 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51757 # undef TARGET_MERGE_DECL_ATTRIBUTES
51758 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51759 #endif
51761 #undef TARGET_COMP_TYPE_ATTRIBUTES
51762 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51764 #undef TARGET_INIT_BUILTINS
51765 #define TARGET_INIT_BUILTINS ix86_init_builtins
51766 #undef TARGET_BUILTIN_DECL
51767 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51768 #undef TARGET_EXPAND_BUILTIN
51769 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51771 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51772 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51773 ix86_builtin_vectorized_function
51775 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51776 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51778 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51779 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51781 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51782 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51784 #undef TARGET_BUILTIN_RECIPROCAL
51785 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51787 #undef TARGET_ASM_FUNCTION_EPILOGUE
51788 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51790 #undef TARGET_ENCODE_SECTION_INFO
51791 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51792 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51793 #else
51794 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51795 #endif
51797 #undef TARGET_ASM_OPEN_PAREN
51798 #define TARGET_ASM_OPEN_PAREN ""
51799 #undef TARGET_ASM_CLOSE_PAREN
51800 #define TARGET_ASM_CLOSE_PAREN ""
51802 #undef TARGET_ASM_BYTE_OP
51803 #define TARGET_ASM_BYTE_OP ASM_BYTE
51805 #undef TARGET_ASM_ALIGNED_HI_OP
51806 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51807 #undef TARGET_ASM_ALIGNED_SI_OP
51808 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51809 #ifdef ASM_QUAD
51810 #undef TARGET_ASM_ALIGNED_DI_OP
51811 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51812 #endif
51814 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51815 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51817 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51818 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51820 #undef TARGET_ASM_UNALIGNED_HI_OP
51821 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51822 #undef TARGET_ASM_UNALIGNED_SI_OP
51823 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51824 #undef TARGET_ASM_UNALIGNED_DI_OP
51825 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51827 #undef TARGET_PRINT_OPERAND
51828 #define TARGET_PRINT_OPERAND ix86_print_operand
51829 #undef TARGET_PRINT_OPERAND_ADDRESS
51830 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51831 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51832 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51833 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51834 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51836 #undef TARGET_SCHED_INIT_GLOBAL
51837 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51838 #undef TARGET_SCHED_ADJUST_COST
51839 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51840 #undef TARGET_SCHED_ISSUE_RATE
51841 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51842 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51843 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51844 ia32_multipass_dfa_lookahead
51845 #undef TARGET_SCHED_MACRO_FUSION_P
51846 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51847 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51848 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51850 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51851 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51853 #undef TARGET_MEMMODEL_CHECK
51854 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51856 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51857 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51859 #ifdef HAVE_AS_TLS
51860 #undef TARGET_HAVE_TLS
51861 #define TARGET_HAVE_TLS true
51862 #endif
51863 #undef TARGET_CANNOT_FORCE_CONST_MEM
51864 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51865 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51866 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51868 #undef TARGET_DELEGITIMIZE_ADDRESS
51869 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51871 #undef TARGET_MS_BITFIELD_LAYOUT_P
51872 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51874 #if TARGET_MACHO
51875 #undef TARGET_BINDS_LOCAL_P
51876 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51877 #else
51878 #undef TARGET_BINDS_LOCAL_P
51879 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
51880 #endif
51881 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51882 #undef TARGET_BINDS_LOCAL_P
51883 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51884 #endif
51886 #undef TARGET_ASM_OUTPUT_MI_THUNK
51887 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51888 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51889 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51891 #undef TARGET_ASM_FILE_START
51892 #define TARGET_ASM_FILE_START x86_file_start
51894 #undef TARGET_OPTION_OVERRIDE
51895 #define TARGET_OPTION_OVERRIDE ix86_option_override
51897 #undef TARGET_REGISTER_MOVE_COST
51898 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51899 #undef TARGET_MEMORY_MOVE_COST
51900 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51901 #undef TARGET_RTX_COSTS
51902 #define TARGET_RTX_COSTS ix86_rtx_costs
51903 #undef TARGET_ADDRESS_COST
51904 #define TARGET_ADDRESS_COST ix86_address_cost
51906 #undef TARGET_FIXED_CONDITION_CODE_REGS
51907 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51908 #undef TARGET_CC_MODES_COMPATIBLE
51909 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51911 #undef TARGET_MACHINE_DEPENDENT_REORG
51912 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51914 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51915 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51917 #undef TARGET_BUILD_BUILTIN_VA_LIST
51918 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51920 #undef TARGET_FOLD_BUILTIN
51921 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51923 #undef TARGET_COMPARE_VERSION_PRIORITY
51924 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51926 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51927 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51928 ix86_generate_version_dispatcher_body
51930 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51931 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51932 ix86_get_function_versions_dispatcher
51934 #undef TARGET_ENUM_VA_LIST_P
51935 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51937 #undef TARGET_FN_ABI_VA_LIST
51938 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51940 #undef TARGET_CANONICAL_VA_LIST_TYPE
51941 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51943 #undef TARGET_EXPAND_BUILTIN_VA_START
51944 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51946 #undef TARGET_MD_ASM_CLOBBERS
51947 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51949 #undef TARGET_PROMOTE_PROTOTYPES
51950 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51951 #undef TARGET_SETUP_INCOMING_VARARGS
51952 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51953 #undef TARGET_MUST_PASS_IN_STACK
51954 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51955 #undef TARGET_FUNCTION_ARG_ADVANCE
51956 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51957 #undef TARGET_FUNCTION_ARG
51958 #define TARGET_FUNCTION_ARG ix86_function_arg
51959 #undef TARGET_INIT_PIC_REG
51960 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51961 #undef TARGET_USE_PSEUDO_PIC_REG
51962 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51963 #undef TARGET_FUNCTION_ARG_BOUNDARY
51964 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51965 #undef TARGET_PASS_BY_REFERENCE
51966 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51967 #undef TARGET_INTERNAL_ARG_POINTER
51968 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51969 #undef TARGET_UPDATE_STACK_BOUNDARY
51970 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51971 #undef TARGET_GET_DRAP_RTX
51972 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51973 #undef TARGET_STRICT_ARGUMENT_NAMING
51974 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51975 #undef TARGET_STATIC_CHAIN
51976 #define TARGET_STATIC_CHAIN ix86_static_chain
51977 #undef TARGET_TRAMPOLINE_INIT
51978 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51979 #undef TARGET_RETURN_POPS_ARGS
51980 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51982 #undef TARGET_LEGITIMATE_COMBINED_INSN
51983 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51985 #undef TARGET_ASAN_SHADOW_OFFSET
51986 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51988 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51989 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51991 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51992 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51994 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51995 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51997 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51998 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
51999 ix86_libgcc_floating_mode_supported_p
52001 #undef TARGET_C_MODE_FOR_SUFFIX
52002 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52004 #ifdef HAVE_AS_TLS
52005 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52006 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52007 #endif
52009 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52010 #undef TARGET_INSERT_ATTRIBUTES
52011 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52012 #endif
52014 #undef TARGET_MANGLE_TYPE
52015 #define TARGET_MANGLE_TYPE ix86_mangle_type
52017 #if !TARGET_MACHO
52018 #undef TARGET_STACK_PROTECT_FAIL
52019 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52020 #endif
52022 #undef TARGET_FUNCTION_VALUE
52023 #define TARGET_FUNCTION_VALUE ix86_function_value
52025 #undef TARGET_FUNCTION_VALUE_REGNO_P
52026 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52028 #undef TARGET_PROMOTE_FUNCTION_MODE
52029 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52031 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52032 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52034 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52035 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52037 #undef TARGET_INSTANTIATE_DECLS
52038 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52040 #undef TARGET_SECONDARY_RELOAD
52041 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52043 #undef TARGET_CLASS_MAX_NREGS
52044 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52046 #undef TARGET_PREFERRED_RELOAD_CLASS
52047 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52048 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52049 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52050 #undef TARGET_CLASS_LIKELY_SPILLED_P
52051 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52053 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52054 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52055 ix86_builtin_vectorization_cost
52056 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52057 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52058 ix86_vectorize_vec_perm_const_ok
52059 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52060 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52061 ix86_preferred_simd_mode
52062 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52063 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52064 ix86_autovectorize_vector_sizes
52065 #undef TARGET_VECTORIZE_INIT_COST
52066 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52067 #undef TARGET_VECTORIZE_ADD_STMT_COST
52068 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52069 #undef TARGET_VECTORIZE_FINISH_COST
52070 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52071 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52072 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52074 #undef TARGET_SET_CURRENT_FUNCTION
52075 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52077 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52078 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52080 #undef TARGET_OPTION_SAVE
52081 #define TARGET_OPTION_SAVE ix86_function_specific_save
52083 #undef TARGET_OPTION_RESTORE
52084 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52086 #undef TARGET_OPTION_POST_STREAM_IN
52087 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52089 #undef TARGET_OPTION_PRINT
52090 #define TARGET_OPTION_PRINT ix86_function_specific_print
52092 #undef TARGET_OPTION_FUNCTION_VERSIONS
52093 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52095 #undef TARGET_CAN_INLINE_P
52096 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52098 #undef TARGET_EXPAND_TO_RTL_HOOK
52099 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52101 #undef TARGET_LEGITIMATE_ADDRESS_P
52102 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52104 #undef TARGET_LRA_P
52105 #define TARGET_LRA_P hook_bool_void_true
52107 #undef TARGET_REGISTER_PRIORITY
52108 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52110 #undef TARGET_REGISTER_USAGE_LEVELING_P
52111 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52113 #undef TARGET_LEGITIMATE_CONSTANT_P
52114 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52116 #undef TARGET_FRAME_POINTER_REQUIRED
52117 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52119 #undef TARGET_CAN_ELIMINATE
52120 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52122 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52123 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52125 #undef TARGET_ASM_CODE_END
52126 #define TARGET_ASM_CODE_END ix86_code_end
52128 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52129 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52131 #if TARGET_MACHO
52132 #undef TARGET_INIT_LIBFUNCS
52133 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52134 #endif
52136 #undef TARGET_LOOP_UNROLL_ADJUST
52137 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52139 #undef TARGET_SPILL_CLASS
52140 #define TARGET_SPILL_CLASS ix86_spill_class
52142 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52143 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52144 ix86_simd_clone_compute_vecsize_and_simdlen
52146 #undef TARGET_SIMD_CLONE_ADJUST
52147 #define TARGET_SIMD_CLONE_ADJUST \
52148 ix86_simd_clone_adjust
52150 #undef TARGET_SIMD_CLONE_USABLE
52151 #define TARGET_SIMD_CLONE_USABLE \
52152 ix86_simd_clone_usable
52154 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52155 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52156 ix86_float_exceptions_rounding_supported_p
52158 #undef TARGET_MODE_EMIT
52159 #define TARGET_MODE_EMIT ix86_emit_mode_set
52161 #undef TARGET_MODE_NEEDED
52162 #define TARGET_MODE_NEEDED ix86_mode_needed
52164 #undef TARGET_MODE_AFTER
52165 #define TARGET_MODE_AFTER ix86_mode_after
52167 #undef TARGET_MODE_ENTRY
52168 #define TARGET_MODE_ENTRY ix86_mode_entry
52170 #undef TARGET_MODE_EXIT
52171 #define TARGET_MODE_EXIT ix86_mode_exit
52173 #undef TARGET_MODE_PRIORITY
52174 #define TARGET_MODE_PRIORITY ix86_mode_priority
52176 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52177 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52179 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52180 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52182 #undef TARGET_STORE_BOUNDS_FOR_ARG
52183 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52185 #undef TARGET_LOAD_RETURNED_BOUNDS
52186 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52188 #undef TARGET_STORE_RETURNED_BOUNDS
52189 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52191 #undef TARGET_CHKP_BOUND_MODE
52192 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52194 #undef TARGET_BUILTIN_CHKP_FUNCTION
52195 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52197 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52198 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52200 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52201 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52203 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52204 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52206 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52207 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52209 #undef TARGET_OFFLOAD_OPTIONS
52210 #define TARGET_OFFLOAD_OPTIONS \
52211 ix86_offload_options
52213 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52214 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52216 struct gcc_target targetm = TARGET_INITIALIZER;
52218 #include "gt-i386.h"