2015-04-13 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / i386 / i386.c
blob3263656eef30ca32b749142bdcefb2aa7eb4d24e
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "calls.h"
39 #include "stor-layout.h"
40 #include "varasm.h"
41 #include "tm_p.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
46 #include "output.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "except.h"
51 #include "function.h"
52 #include "recog.h"
53 #include "hashtab.h"
54 #include "statistics.h"
55 #include "real.h"
56 #include "fixed-value.h"
57 #include "expmed.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "emit-rtl.h"
61 #include "stmt.h"
62 #include "expr.h"
63 #include "optabs.h"
64 #include "diagnostic-core.h"
65 #include "toplev.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "ggc.h"
76 #include "target.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
80 #include "reload.h"
81 #include "hash-map.h"
82 #include "is-a.h"
83 #include "plugin-api.h"
84 #include "ipa-ref.h"
85 #include "cgraph.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "gimple.h"
93 #include "gimplify.h"
94 #include "cfgloop.h"
95 #include "dwarf2.h"
96 #include "df.h"
97 #include "tm-constrs.h"
98 #include "params.h"
99 #include "cselib.h"
100 #include "debug.h"
101 #include "sched-int.h"
102 #include "sbitmap.h"
103 #include "fibheap.h"
104 #include "opts.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
108 #include "context.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
125 #endif
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
133 : 4)
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148 const
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
168 2, /* MOVE_RATIO */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
194 2, /* Branch cost */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
201 ix86_size_memcpy,
202 ix86_size_memset,
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
224 static const
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
244 3, /* MOVE_RATIO */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
270 1, /* Branch cost */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
277 i386_memcpy,
278 i386_memset,
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
299 static const
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
319 3, /* MOVE_RATIO */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
347 1, /* Branch cost */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
354 i486_memcpy,
355 i486_memset,
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
376 static const
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
396 6, /* MOVE_RATIO */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
422 2, /* Branch cost */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
429 pentium_memcpy,
430 pentium_memset,
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
459 static const
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
479 6, /* MOVE_RATIO */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
505 2, /* Branch cost */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
512 pentiumpro_memcpy,
513 pentiumpro_memset,
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
533 static const
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
553 4, /* MOVE_RATIO */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
580 1, /* Branch cost */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
587 geode_memcpy,
588 geode_memset,
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
608 static const
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
628 4, /* MOVE_RATIO */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
657 1, /* Branch cost */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
664 k6_memcpy,
665 k6_memset,
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
688 static const
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
708 9, /* MOVE_RATIO */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
734 5, /* Branch cost */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
741 athlon_memcpy,
742 athlon_memset,
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
769 static const
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
789 9, /* MOVE_RATIO */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
818 time). */
819 100, /* number of parallel prefetches */
820 3, /* Branch cost */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
828 k8_memcpy,
829 k8_memset,
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
875 9, /* MOVE_RATIO */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
897 /* On K8:
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
900 On AMDFAM10:
901 MOVD reg64, xmmreg Double FADD 3
902 1/1 1/1
903 MOVD reg32, xmmreg Double FADD 3
904 1/1 1/1 */
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
912 time). */
913 100, /* number of parallel prefetches */
914 2, /* Branch cost */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
922 amdfam10_memcpy,
923 amdfam10_memset,
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
970 9, /* MOVE_RATIO */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
992 /* On K8:
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
995 On AMDFAM10:
996 MOVD reg64, xmmreg Double FADD 3
997 1/1 1/1
998 MOVD reg32, xmmreg Double FADD 3
999 1/1 1/1 */
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1007 time). */
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1017 bdver1_memcpy,
1018 bdver1_memset,
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 4, /* cond_taken_branch_cost. */
1029 2, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1066 9, /* MOVE_RATIO */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1088 /* On K8:
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1091 On AMDFAM10:
1092 MOVD reg64, xmmreg Double FADD 3
1093 1/1 1/1
1094 MOVD reg32, xmmreg Double FADD 3
1095 1/1 1/1 */
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1103 time). */
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1113 bdver2_memcpy,
1114 bdver2_memset,
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 4, /* cond_taken_branch_cost. */
1125 2, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1161 9, /* MOVE_RATIO */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1190 time). */
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1200 bdver3_memcpy,
1201 bdver3_memset,
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 4, /* cond_taken_branch_cost. */
1212 2, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1247 9, /* MOVE_RATIO */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1286 bdver4_memcpy,
1287 bdver4_memset,
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 4, /* cond_taken_branch_cost. */
1298 2, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1333 9, /* MOVE_RATIO */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1355 /* On K8:
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1358 On AMDFAM10:
1359 MOVD reg64, xmmreg Double FADD 3
1360 1/1 1/1
1361 MOVD reg32, xmmreg Double FADD 3
1362 1/1 1/1 */
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1375 btver1_memcpy,
1376 btver1_memset,
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1419 9, /* MOVE_RATIO */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1441 /* On K8:
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1444 On AMDFAM10:
1445 MOVD reg64, xmmreg Double FADD 3
1446 1/1 1/1
1447 MOVD reg32, xmmreg Double FADD 3
1448 1/1 1/1 */
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1460 btver2_memcpy,
1461 btver2_memset,
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1483 static const
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1503 6, /* MOVE_RATIO */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1536 pentium4_memcpy,
1537 pentium4_memset,
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562 static const
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1615 nocona_memcpy,
1616 nocona_memset,
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1639 static const
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1692 atom_memcpy,
1693 atom_memset,
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1716 static const
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1769 slm_memcpy,
1770 slm_memset,
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1793 static const
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1846 intel_memcpy,
1847 intel_memset,
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1874 static const
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1933 generic_memcpy,
1934 generic_memset,
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1955 {24, loop, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1961 static const
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2019 core_memcpy,
2020 core_memset,
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2079 #undef DEF_TUNE
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2082 #undef DEF_TUNE
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2091 #undef DEF_TUNE
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2094 #undef DEF_TUNE
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2107 ~m_386,
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2110 ~(m_386 | m_486),
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2113 ~m_386,
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2116 ~m_386,
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2121 epilogue code. */
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2138 /* FP registers */
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2141 /* arg pointer */
2142 NON_Q_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2145 /* SSE registers */
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2147 SSE_REGS, SSE_REGS,
2148 /* MMX registers */
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2150 MMX_REGS, MMX_REGS,
2151 /* REX registers */
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2156 SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2247 numbers.
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293 SI_REG, DI_REG,
2294 XMM6_REG, XMM7_REG,
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2303 unsigned short n;
2304 rtx rtl;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2311 [arguments]
2312 <- ARG_POINTER
2313 saved pc
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2319 [saved regs]
2320 <- regs_save_offset
2321 [padding0]
2323 [saved SSE regs]
2324 <- sse_regs_save_offset
2325 [padding1] |
2326 | <- FRAME_POINTER
2327 [va_arg registers] |
2329 [frame] |
2331 [padding2] | = to_allocate
2332 <- STACK_POINTER
2334 struct ix86_frame
2336 int nsseregs;
2337 int nregs;
2338 int va_arg_size;
2339 int red_zone_size;
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2388 command line. */
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2406 tree x86_mfence;
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2417 X86_64_NO_CLASS,
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2420 X86_64_SSE_CLASS,
2421 X86_64_SSESF_CLASS,
2422 X86_64_SSEDF_CLASS,
2423 X86_64_SSEUP_CLASS,
2424 X86_64_X87_CLASS,
2425 X86_64_X87UP_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2427 X86_64_MEMORY_CLASS
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2441 const_tree);
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2446 rtx, rtx, int);
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2483 #endif
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2496 struct ptt
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538 static unsigned int
2539 rest_of_handle_insert_vzeroupper (void)
2541 int i;
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2554 return 0;
2557 namespace {
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2574 public:
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2584 && !optimize_size;
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2594 } // anon namespace
2596 rtl_opt_pass *
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2604 static inline bool
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2613 static char *
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2616 bool add_nl_p)
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2682 /* Flag options. */
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char isa_other[40];
2718 char target_other[40];
2719 unsigned num = 0;
2720 unsigned i, j;
2721 char *ret;
2722 char *ptr;
2723 size_t len;
2724 size_t line_len;
2725 size_t sep_len;
2726 const char *abi;
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2731 if (arch)
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2738 if (tune)
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2748 abi = "-m64";
2749 else
2750 abi = "-mx32";
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2755 else
2756 abi = "-m32";
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2773 isa);
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2793 if (fpmath)
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2798 case FPMATH_387:
2799 opts[num++][1] = "387";
2800 break;
2802 case FPMATH_SSE:
2803 opts[num++][1] = "sse";
2804 break;
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2808 break;
2810 default:
2811 gcc_unreachable ();
2815 /* Any options? */
2816 if (num == 0)
2817 return NULL;
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2822 len = 0;
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2826 len += sep_len;
2827 for (j = 0; j < 2; j++)
2828 if (opts[i][j])
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2834 line_len = 0;
2836 for (i = 0; i < num; i++)
2838 size_t len2[2];
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2843 if (i != 0)
2845 *ptr++ = ' ';
2846 line_len++;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2850 *ptr++ = '\\';
2851 *ptr++ = '\n';
2852 line_len = 0;
2856 for (j = 0; j < 2; j++)
2857 if (opts[i][j])
2859 memcpy (ptr, opts[i][j], len2[j]);
2860 ptr += len2[j];
2861 line_len += len2[j];
2865 *ptr = '\0';
2866 gcc_assert (ret + len >= ptr);
2868 return ret;
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2874 static bool
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2881 options. */
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2887 ix86_fpmath, true);
2889 if (opts)
2891 fprintf (stderr, "%s\n\n", opts);
2892 free (opts);
2894 else
2895 fputs ("<no options>\n\n", stderr);
2897 return;
2900 static const char *stringop_alg_names[] = {
2901 #define DEF_ENUM
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2904 #undef DEF_ENUM
2905 #undef DEF_ALG
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2917 Examples:
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2935 int max;
2936 stringop_alg alg;
2937 bool noalign;
2940 static void
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2946 int i = 0, n = 0;
2948 if (is_memset)
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2950 else
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2957 int maxs;
2958 char alg_name[128];
2959 char align[16];
2960 next_range_str = strchr (curr_range_str, ',');
2961 if (next_range_str)
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2976 return;
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2981 break;
2983 if (i == last_alg)
2985 error ("wrong stringop strategy name %s specified for option %s",
2986 alg_name,
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2988 return;
2991 input_ranges[n].max = maxs;
2992 input_ranges[n].alg = (stringop_alg) i;
2993 if (!strcmp (align, "align"))
2994 input_ranges[n].noalign = false;
2995 else if (!strcmp (align, "noalign"))
2996 input_ranges[n].noalign = true;
2997 else
2999 error ("unknown alignment %s specified for option %s",
3000 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3001 return;
3003 n++;
3004 curr_range_str = next_range_str;
3006 while (curr_range_str);
3008 if (input_ranges[n - 1].max != -1)
3010 error ("the max value for the last size range should be -1"
3011 " for option %s",
3012 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3013 return;
3016 if (n > MAX_STRINGOP_ALGS)
3018 error ("too many size ranges specified in option %s",
3019 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3020 return;
3023 /* Now override the default algs array. */
3024 for (i = 0; i < n; i++)
3026 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3027 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3028 = input_ranges[i].alg;
3029 *const_cast<int *>(&default_algs->size[i].noalign)
3030 = input_ranges[i].noalign;
3035 /* parse -mtune-ctrl= option. When DUMP is true,
3036 print the features that are explicitly set. */
3038 static void
3039 parse_mtune_ctrl_str (bool dump)
3041 if (!ix86_tune_ctrl_string)
3042 return;
3044 char *next_feature_string = NULL;
3045 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3046 char *orig = curr_feature_string;
3047 int i;
3050 bool clear = false;
3052 next_feature_string = strchr (curr_feature_string, ',');
3053 if (next_feature_string)
3054 *next_feature_string++ = '\0';
3055 if (*curr_feature_string == '^')
3057 curr_feature_string++;
3058 clear = true;
3060 for (i = 0; i < X86_TUNE_LAST; i++)
3062 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3064 ix86_tune_features[i] = !clear;
3065 if (dump)
3066 fprintf (stderr, "Explicitly %s feature %s\n",
3067 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3068 break;
3071 if (i == X86_TUNE_LAST)
3072 error ("Unknown parameter to option -mtune-ctrl: %s",
3073 clear ? curr_feature_string - 1 : curr_feature_string);
3074 curr_feature_string = next_feature_string;
3076 while (curr_feature_string);
3077 free (orig);
3080 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3081 processor type. */
3083 static void
3084 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3086 unsigned int ix86_tune_mask = 1u << ix86_tune;
3087 int i;
3089 for (i = 0; i < X86_TUNE_LAST; ++i)
3091 if (ix86_tune_no_default)
3092 ix86_tune_features[i] = 0;
3093 else
3094 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3097 if (dump)
3099 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3100 for (i = 0; i < X86_TUNE_LAST; i++)
3101 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3102 ix86_tune_features[i] ? "on" : "off");
3105 parse_mtune_ctrl_str (dump);
3109 /* Override various settings based on options. If MAIN_ARGS_P, the
3110 options are from the command line, otherwise they are from
3111 attributes. */
3113 static void
3114 ix86_option_override_internal (bool main_args_p,
3115 struct gcc_options *opts,
3116 struct gcc_options *opts_set)
3118 int i;
3119 unsigned int ix86_arch_mask;
3120 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3121 const char *prefix;
3122 const char *suffix;
3123 const char *sw;
3125 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3126 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3127 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3128 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3129 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3130 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3131 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3132 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3133 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3134 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3135 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3136 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3137 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3138 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3139 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3140 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3141 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3142 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3143 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3144 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3145 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3146 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3147 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3148 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3149 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3150 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3151 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3152 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3153 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3154 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3155 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3156 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3157 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3158 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3159 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3160 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3161 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3162 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3163 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3164 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3165 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3166 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3167 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3168 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3169 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3170 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3171 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3172 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3173 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3174 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3175 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3176 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3177 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3178 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3179 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3180 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3181 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3183 #define PTA_CORE2 \
3184 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3185 | PTA_CX16 | PTA_FXSR)
3186 #define PTA_NEHALEM \
3187 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3188 #define PTA_WESTMERE \
3189 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3190 #define PTA_SANDYBRIDGE \
3191 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3192 #define PTA_IVYBRIDGE \
3193 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3194 #define PTA_HASWELL \
3195 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3196 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3197 #define PTA_BROADWELL \
3198 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3199 #define PTA_KNL \
3200 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3201 #define PTA_BONNELL \
3202 (PTA_CORE2 | PTA_MOVBE)
3203 #define PTA_SILVERMONT \
3204 (PTA_WESTMERE | PTA_MOVBE)
3206 /* if this reaches 64, need to widen struct pta flags below */
3208 static struct pta
3210 const char *const name; /* processor name or nickname. */
3211 const enum processor_type processor;
3212 const enum attr_cpu schedule;
3213 const unsigned HOST_WIDE_INT flags;
3215 const processor_alias_table[] =
3217 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3218 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3219 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3221 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3222 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3223 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3225 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3226 PTA_MMX | PTA_SSE | PTA_FXSR},
3227 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3229 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3230 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3231 PTA_MMX | PTA_SSE | PTA_FXSR},
3232 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3233 PTA_MMX | PTA_SSE | PTA_FXSR},
3234 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3235 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3236 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3237 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3238 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3239 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3240 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3241 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3242 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3245 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3246 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3248 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3249 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3250 PTA_SANDYBRIDGE},
3251 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3252 PTA_SANDYBRIDGE},
3253 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3254 PTA_IVYBRIDGE},
3255 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3256 PTA_IVYBRIDGE},
3257 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3259 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3260 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3262 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3264 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3265 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3266 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3267 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3268 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3269 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3271 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3272 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3273 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3274 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3275 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3276 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3278 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3280 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3281 {"x86-64", PROCESSOR_K8, CPU_K8,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3283 {"k8", PROCESSOR_K8, CPU_K8,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3285 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3286 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3288 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3289 {"opteron", PROCESSOR_K8, CPU_K8,
3290 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3291 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3292 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3293 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3294 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3295 {"athlon64", PROCESSOR_K8, CPU_K8,
3296 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3297 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3298 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3299 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3302 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3305 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3306 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3307 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3308 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3309 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3310 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3312 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3313 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3314 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3315 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3318 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3319 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3320 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3321 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3323 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3324 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3325 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3326 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3327 | PTA_XSAVEOPT | PTA_FSGSBASE},
3328 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3329 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3330 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3331 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3332 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3333 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3334 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3335 | PTA_MOVBE},
3336 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3337 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3339 | PTA_FXSR | PTA_XSAVE},
3340 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3341 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3342 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3343 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3344 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3345 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3347 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3348 PTA_64BIT
3349 | PTA_HLE /* flags are only used for -march switch. */ },
3352 /* -mrecip options. */
3353 static struct
3355 const char *string; /* option name */
3356 unsigned int mask; /* mask bits to set */
3358 const recip_options[] =
3360 { "all", RECIP_MASK_ALL },
3361 { "none", RECIP_MASK_NONE },
3362 { "div", RECIP_MASK_DIV },
3363 { "sqrt", RECIP_MASK_SQRT },
3364 { "vec-div", RECIP_MASK_VEC_DIV },
3365 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3368 int const pta_size = ARRAY_SIZE (processor_alias_table);
3370 /* Set up prefix/suffix so the error messages refer to either the command
3371 line argument, or the attribute(target). */
3372 if (main_args_p)
3374 prefix = "-m";
3375 suffix = "";
3376 sw = "switch";
3378 else
3380 prefix = "option(\"";
3381 suffix = "\")";
3382 sw = "attribute";
3385 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3386 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3387 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3388 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3389 #ifdef TARGET_BI_ARCH
3390 else
3392 #if TARGET_BI_ARCH == 1
3393 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3394 is on and OPTION_MASK_ABI_X32 is off. We turn off
3395 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3396 -mx32. */
3397 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3399 #else
3400 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3401 on and OPTION_MASK_ABI_64 is off. We turn off
3402 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3403 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3404 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3405 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3407 #endif
3409 #endif
3411 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3414 OPTION_MASK_ABI_64 for TARGET_X32. */
3415 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3416 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3418 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3420 | OPTION_MASK_ABI_X32
3421 | OPTION_MASK_ABI_64);
3422 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3424 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3425 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3426 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3430 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3431 SUBTARGET_OVERRIDE_OPTIONS;
3432 #endif
3434 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3435 SUBSUBTARGET_OVERRIDE_OPTIONS;
3436 #endif
3438 /* -fPIC is the default for x86_64. */
3439 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3440 opts->x_flag_pic = 2;
3442 /* Need to check -mtune=generic first. */
3443 if (opts->x_ix86_tune_string)
3445 /* As special support for cross compilers we read -mtune=native
3446 as -mtune=generic. With native compilers we won't see the
3447 -mtune=native, as it was changed by the driver. */
3448 if (!strcmp (opts->x_ix86_tune_string, "native"))
3450 opts->x_ix86_tune_string = "generic";
3452 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3453 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3454 "%stune=k8%s or %stune=generic%s instead as appropriate",
3455 prefix, suffix, prefix, suffix, prefix, suffix);
3457 else
3459 if (opts->x_ix86_arch_string)
3460 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3461 if (!opts->x_ix86_tune_string)
3463 opts->x_ix86_tune_string
3464 = processor_target_table[TARGET_CPU_DEFAULT].name;
3465 ix86_tune_defaulted = 1;
3468 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3469 or defaulted. We need to use a sensible tune option. */
3470 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3472 opts->x_ix86_tune_string = "generic";
3476 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3477 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3479 /* rep; movq isn't available in 32-bit code. */
3480 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3481 opts->x_ix86_stringop_alg = no_stringop;
3484 if (!opts->x_ix86_arch_string)
3485 opts->x_ix86_arch_string
3486 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3487 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3488 else
3489 ix86_arch_specified = 1;
3491 if (opts_set->x_ix86_pmode)
3493 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3494 && opts->x_ix86_pmode == PMODE_SI)
3495 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3496 && opts->x_ix86_pmode == PMODE_DI))
3497 error ("address mode %qs not supported in the %s bit mode",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3499 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3501 else
3502 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3503 ? PMODE_DI : PMODE_SI;
3505 if (!opts_set->x_ix86_abi)
3506 opts->x_ix86_abi = DEFAULT_ABI;
3508 /* For targets using ms ABI enable ms-extensions, if not
3509 explicit turned off. For non-ms ABI we turn off this
3510 option. */
3511 if (!opts_set->x_flag_ms_extensions)
3512 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3514 if (opts_set->x_ix86_cmodel)
3516 switch (opts->x_ix86_cmodel)
3518 case CM_SMALL:
3519 case CM_SMALL_PIC:
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_SMALL_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "small", "32");
3525 break;
3527 case CM_MEDIUM:
3528 case CM_MEDIUM_PIC:
3529 if (opts->x_flag_pic)
3530 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3531 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3532 error ("code model %qs not supported in the %s bit mode",
3533 "medium", "32");
3534 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3535 error ("code model %qs not supported in x32 mode",
3536 "medium");
3537 break;
3539 case CM_LARGE:
3540 case CM_LARGE_PIC:
3541 if (opts->x_flag_pic)
3542 opts->x_ix86_cmodel = CM_LARGE_PIC;
3543 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3544 error ("code model %qs not supported in the %s bit mode",
3545 "large", "32");
3546 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3547 error ("code model %qs not supported in x32 mode",
3548 "large");
3549 break;
3551 case CM_32:
3552 if (opts->x_flag_pic)
3553 error ("code model %s does not support PIC mode", "32");
3554 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 error ("code model %qs not supported in the %s bit mode",
3556 "32", "64");
3557 break;
3559 case CM_KERNEL:
3560 if (opts->x_flag_pic)
3562 error ("code model %s does not support PIC mode", "kernel");
3563 opts->x_ix86_cmodel = CM_32;
3565 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3566 error ("code model %qs not supported in the %s bit mode",
3567 "kernel", "32");
3568 break;
3570 default:
3571 gcc_unreachable ();
3574 else
3576 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3577 use of rip-relative addressing. This eliminates fixups that
3578 would otherwise be needed if this object is to be placed in a
3579 DLL, and is essentially just as efficient as direct addressing. */
3580 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3581 && (TARGET_RDOS || TARGET_PECOFF))
3582 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3583 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3585 else
3586 opts->x_ix86_cmodel = CM_32;
3588 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3590 error ("-masm=intel not supported in this configuration");
3591 opts->x_ix86_asm_dialect = ASM_ATT;
3593 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3594 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3595 sorry ("%i-bit mode not compiled in",
3596 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3598 for (i = 0; i < pta_size; i++)
3599 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3601 ix86_schedule = processor_alias_table[i].schedule;
3602 ix86_arch = processor_alias_table[i].processor;
3603 /* Default cpu tuning to the architecture. */
3604 ix86_tune = ix86_arch;
3606 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3607 && !(processor_alias_table[i].flags & PTA_64BIT))
3608 error ("CPU you selected does not support x86-64 "
3609 "instruction set");
3611 if (processor_alias_table[i].flags & PTA_MMX
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3614 if (processor_alias_table[i].flags & PTA_3DNOW
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3617 if (processor_alias_table[i].flags & PTA_3DNOW_A
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3620 if (processor_alias_table[i].flags & PTA_SSE
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3623 if (processor_alias_table[i].flags & PTA_SSE2
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3626 if (processor_alias_table[i].flags & PTA_SSE3
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3629 if (processor_alias_table[i].flags & PTA_SSSE3
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3632 if (processor_alias_table[i].flags & PTA_SSE4_1
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3635 if (processor_alias_table[i].flags & PTA_SSE4_2
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3638 if (processor_alias_table[i].flags & PTA_AVX
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3641 if (processor_alias_table[i].flags & PTA_AVX2
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3644 if (processor_alias_table[i].flags & PTA_FMA
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3647 if (processor_alias_table[i].flags & PTA_SSE4A
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3650 if (processor_alias_table[i].flags & PTA_FMA4
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3653 if (processor_alias_table[i].flags & PTA_XOP
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3656 if (processor_alias_table[i].flags & PTA_LWP
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3659 if (processor_alias_table[i].flags & PTA_ABM
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3662 if (processor_alias_table[i].flags & PTA_BMI
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3665 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3668 if (processor_alias_table[i].flags & PTA_TBM
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3671 if (processor_alias_table[i].flags & PTA_BMI2
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3674 if (processor_alias_table[i].flags & PTA_CX16
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3677 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3680 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3681 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3684 if (processor_alias_table[i].flags & PTA_MOVBE
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3687 if (processor_alias_table[i].flags & PTA_AES
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3689 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3690 if (processor_alias_table[i].flags & PTA_SHA
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3692 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3693 if (processor_alias_table[i].flags & PTA_PCLMUL
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3696 if (processor_alias_table[i].flags & PTA_FSGSBASE
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3699 if (processor_alias_table[i].flags & PTA_RDRND
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3702 if (processor_alias_table[i].flags & PTA_F16C
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3705 if (processor_alias_table[i].flags & PTA_RTM
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3708 if (processor_alias_table[i].flags & PTA_HLE
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3711 if (processor_alias_table[i].flags & PTA_PRFCHW
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3714 if (processor_alias_table[i].flags & PTA_RDSEED
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3717 if (processor_alias_table[i].flags & PTA_ADX
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3720 if (processor_alias_table[i].flags & PTA_FXSR
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3723 if (processor_alias_table[i].flags & PTA_XSAVE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3726 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3729 if (processor_alias_table[i].flags & PTA_AVX512F
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3732 if (processor_alias_table[i].flags & PTA_AVX512ER
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3735 if (processor_alias_table[i].flags & PTA_AVX512PF
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3738 if (processor_alias_table[i].flags & PTA_AVX512CD
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3741 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3744 if (processor_alias_table[i].flags & PTA_PCOMMIT
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3747 if (processor_alias_table[i].flags & PTA_CLWB
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3750 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3753 if (processor_alias_table[i].flags & PTA_XSAVEC
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3756 if (processor_alias_table[i].flags & PTA_XSAVES
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3759 if (processor_alias_table[i].flags & PTA_AVX512DQ
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3762 if (processor_alias_table[i].flags & PTA_AVX512BW
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3765 if (processor_alias_table[i].flags & PTA_AVX512VL
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3768 if (processor_alias_table[i].flags & PTA_MPX
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3771 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3774 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3777 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3778 x86_prefetch_sse = true;
3780 break;
3783 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3784 error ("Intel MPX does not support x32");
3786 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3787 error ("Intel MPX does not support x32");
3789 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3790 error ("generic CPU can be used only for %stune=%s %s",
3791 prefix, suffix, sw);
3792 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3793 error ("intel CPU can be used only for %stune=%s %s",
3794 prefix, suffix, sw);
3795 else if (i == pta_size)
3796 error ("bad value (%s) for %sarch=%s %s",
3797 opts->x_ix86_arch_string, prefix, suffix, sw);
3799 ix86_arch_mask = 1u << ix86_arch;
3800 for (i = 0; i < X86_ARCH_LAST; ++i)
3801 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3803 for (i = 0; i < pta_size; i++)
3804 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3806 ix86_schedule = processor_alias_table[i].schedule;
3807 ix86_tune = processor_alias_table[i].processor;
3808 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 if (!(processor_alias_table[i].flags & PTA_64BIT))
3812 if (ix86_tune_defaulted)
3814 opts->x_ix86_tune_string = "x86-64";
3815 for (i = 0; i < pta_size; i++)
3816 if (! strcmp (opts->x_ix86_tune_string,
3817 processor_alias_table[i].name))
3818 break;
3819 ix86_schedule = processor_alias_table[i].schedule;
3820 ix86_tune = processor_alias_table[i].processor;
3822 else
3823 error ("CPU you selected does not support x86-64 "
3824 "instruction set");
3827 /* Intel CPUs have always interpreted SSE prefetch instructions as
3828 NOPs; so, we can enable SSE prefetch instructions even when
3829 -mtune (rather than -march) points us to a processor that has them.
3830 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3831 higher processors. */
3832 if (TARGET_CMOV
3833 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3834 x86_prefetch_sse = true;
3835 break;
3838 if (ix86_tune_specified && i == pta_size)
3839 error ("bad value (%s) for %stune=%s %s",
3840 opts->x_ix86_tune_string, prefix, suffix, sw);
3842 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3844 #ifndef USE_IX86_FRAME_POINTER
3845 #define USE_IX86_FRAME_POINTER 0
3846 #endif
3848 #ifndef USE_X86_64_FRAME_POINTER
3849 #define USE_X86_64_FRAME_POINTER 0
3850 #endif
3852 /* Set the default values for switches whose default depends on TARGET_64BIT
3853 in case they weren't overwritten by command line options. */
3854 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3856 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3857 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3858 if (opts->x_flag_asynchronous_unwind_tables
3859 && !opts_set->x_flag_unwind_tables
3860 && TARGET_64BIT_MS_ABI)
3861 opts->x_flag_unwind_tables = 1;
3862 if (opts->x_flag_asynchronous_unwind_tables == 2)
3863 opts->x_flag_unwind_tables
3864 = opts->x_flag_asynchronous_unwind_tables = 1;
3865 if (opts->x_flag_pcc_struct_return == 2)
3866 opts->x_flag_pcc_struct_return = 0;
3868 else
3870 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3871 opts->x_flag_omit_frame_pointer
3872 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3873 if (opts->x_flag_asynchronous_unwind_tables == 2)
3874 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3875 if (opts->x_flag_pcc_struct_return == 2)
3876 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3879 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3880 /* TODO: ix86_cost should be chosen at instruction or function granuality
3881 so for cold code we use size_cost even in !optimize_size compilation. */
3882 if (opts->x_optimize_size)
3883 ix86_cost = &ix86_size_cost;
3884 else
3885 ix86_cost = ix86_tune_cost;
3887 /* Arrange to set up i386_stack_locals for all functions. */
3888 init_machine_status = ix86_init_machine_status;
3890 /* Validate -mregparm= value. */
3891 if (opts_set->x_ix86_regparm)
3893 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3894 warning (0, "-mregparm is ignored in 64-bit mode");
3895 if (opts->x_ix86_regparm > REGPARM_MAX)
3897 error ("-mregparm=%d is not between 0 and %d",
3898 opts->x_ix86_regparm, REGPARM_MAX);
3899 opts->x_ix86_regparm = 0;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_regparm = REGPARM_MAX;
3905 /* Default align_* from the processor table. */
3906 if (opts->x_align_loops == 0)
3908 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3909 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3911 if (opts->x_align_jumps == 0)
3913 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3914 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3916 if (opts->x_align_functions == 0)
3918 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3921 /* Provide default for -mbranch-cost= value. */
3922 if (!opts_set->x_ix86_branch_cost)
3923 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3927 opts->x_target_flags
3928 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3930 /* Enable by default the SSE and MMX builtins. Do allow the user to
3931 explicitly disable any of these. In particular, disabling SSE and
3932 MMX for kernel code is extremely useful. */
3933 if (!ix86_arch_specified)
3934 opts->x_ix86_isa_flags
3935 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3936 | TARGET_SUBTARGET64_ISA_DEFAULT)
3937 & ~opts->x_ix86_isa_flags_explicit);
3939 if (TARGET_RTD_P (opts->x_target_flags))
3940 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3942 else
3944 opts->x_target_flags
3945 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3947 if (!ix86_arch_specified)
3948 opts->x_ix86_isa_flags
3949 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3951 /* i386 ABI does not specify red zone. It still makes sense to use it
3952 when programmer takes care to stack from being destroyed. */
3953 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3954 opts->x_target_flags |= MASK_NO_RED_ZONE;
3957 /* Keep nonleaf frame pointers. */
3958 if (opts->x_flag_omit_frame_pointer)
3959 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3960 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3961 opts->x_flag_omit_frame_pointer = 1;
3963 /* If we're doing fast math, we don't care about comparison order
3964 wrt NaNs. This lets us use a shorter comparison sequence. */
3965 if (opts->x_flag_finite_math_only)
3966 opts->x_target_flags &= ~MASK_IEEE_FP;
3968 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3969 since the insns won't need emulation. */
3970 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3971 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3973 /* Likewise, if the target doesn't have a 387, or we've specified
3974 software floating point, don't use 387 inline intrinsics. */
3975 if (!TARGET_80387_P (opts->x_target_flags))
3976 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3978 /* Turn on MMX builtins for -msse. */
3979 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 opts->x_ix86_isa_flags
3981 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3983 /* Enable SSE prefetch. */
3984 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3985 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3986 x86_prefetch_sse = true;
3988 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3989 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3990 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3991 opts->x_ix86_isa_flags
3992 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3994 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3995 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3996 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_isa_flags
3998 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4000 /* Enable lzcnt instruction for -mabm. */
4001 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4002 opts->x_ix86_isa_flags
4003 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4005 /* Validate -mpreferred-stack-boundary= value or default it to
4006 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4007 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4008 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4010 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4011 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4012 int max = (TARGET_SEH ? 4 : 12);
4014 if (opts->x_ix86_preferred_stack_boundary_arg < min
4015 || opts->x_ix86_preferred_stack_boundary_arg > max)
4017 if (min == max)
4018 error ("-mpreferred-stack-boundary is not supported "
4019 "for this target");
4020 else
4021 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4022 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4024 else
4025 ix86_preferred_stack_boundary
4026 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4029 /* Set the default value for -mstackrealign. */
4030 if (opts->x_ix86_force_align_arg_pointer == -1)
4031 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4033 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4035 /* Validate -mincoming-stack-boundary= value or default it to
4036 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4037 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4038 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4040 if (opts->x_ix86_incoming_stack_boundary_arg
4041 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4042 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4043 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4044 opts->x_ix86_incoming_stack_boundary_arg,
4045 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4046 else
4048 ix86_user_incoming_stack_boundary
4049 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4050 ix86_incoming_stack_boundary
4051 = ix86_user_incoming_stack_boundary;
4055 #ifndef NO_PROFILE_COUNTERS
4056 if (flag_nop_mcount)
4057 error ("-mnop-mcount is not compatible with this target");
4058 #endif
4059 if (flag_nop_mcount && flag_pic)
4060 error ("-mnop-mcount is not implemented for -fPIC");
4062 /* Accept -msseregparm only if at least SSE support is enabled. */
4063 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4064 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4065 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4067 if (opts_set->x_ix86_fpmath)
4069 if (opts->x_ix86_fpmath & FPMATH_SSE)
4071 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4073 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4074 opts->x_ix86_fpmath = FPMATH_387;
4076 else if ((opts->x_ix86_fpmath & FPMATH_387)
4077 && !TARGET_80387_P (opts->x_target_flags))
4079 warning (0, "387 instruction set disabled, using SSE arithmetics");
4080 opts->x_ix86_fpmath = FPMATH_SSE;
4084 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4085 fpmath=387. The second is however default at many targets since the
4086 extra 80bit precision of temporaries is considered to be part of ABI.
4087 Overwrite the default at least for -ffast-math.
4088 TODO: -mfpmath=both seems to produce same performing code with bit
4089 smaller binaries. It is however not clear if register allocation is
4090 ready for this setting.
4091 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4092 codegen. We may switch to 387 with -ffast-math for size optimized
4093 functions. */
4094 else if (fast_math_flags_set_p (&global_options)
4095 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4096 opts->x_ix86_fpmath = FPMATH_SSE;
4097 else
4098 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4100 /* If the i387 is disabled, then do not return values in it. */
4101 if (!TARGET_80387_P (opts->x_target_flags))
4102 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4104 /* Use external vectorized library in vectorizing intrinsics. */
4105 if (opts_set->x_ix86_veclibabi_type)
4106 switch (opts->x_ix86_veclibabi_type)
4108 case ix86_veclibabi_type_svml:
4109 ix86_veclib_handler = ix86_veclibabi_svml;
4110 break;
4112 case ix86_veclibabi_type_acml:
4113 ix86_veclib_handler = ix86_veclibabi_acml;
4114 break;
4116 default:
4117 gcc_unreachable ();
4120 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4121 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4122 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4124 /* If stack probes are required, the space used for large function
4125 arguments on the stack must also be probed, so enable
4126 -maccumulate-outgoing-args so this happens in the prologue. */
4127 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4128 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4130 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4131 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4132 "for correctness", prefix, suffix);
4133 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4136 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4138 char *p;
4139 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4140 p = strchr (internal_label_prefix, 'X');
4141 internal_label_prefix_len = p - internal_label_prefix;
4142 *p = '\0';
4145 /* When scheduling description is not available, disable scheduler pass
4146 so it won't slow down the compilation and make x87 code slower. */
4147 if (!TARGET_SCHEDULE)
4148 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4150 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4151 ix86_tune_cost->simultaneous_prefetches,
4152 opts->x_param_values,
4153 opts_set->x_param_values);
4154 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4155 ix86_tune_cost->prefetch_block,
4156 opts->x_param_values,
4157 opts_set->x_param_values);
4158 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4159 ix86_tune_cost->l1_cache_size,
4160 opts->x_param_values,
4161 opts_set->x_param_values);
4162 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4163 ix86_tune_cost->l2_cache_size,
4164 opts->x_param_values,
4165 opts_set->x_param_values);
4167 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4168 if (opts->x_flag_prefetch_loop_arrays < 0
4169 && HAVE_prefetch
4170 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4171 && !opts->x_optimize_size
4172 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4173 opts->x_flag_prefetch_loop_arrays = 1;
4175 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4176 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4177 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4178 targetm.expand_builtin_va_start = NULL;
4180 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4182 ix86_gen_leave = gen_leave_rex64;
4183 if (Pmode == DImode)
4185 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4186 ix86_gen_tls_local_dynamic_base_64
4187 = gen_tls_local_dynamic_base_64_di;
4189 else
4191 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4192 ix86_gen_tls_local_dynamic_base_64
4193 = gen_tls_local_dynamic_base_64_si;
4196 else
4197 ix86_gen_leave = gen_leave;
4199 if (Pmode == DImode)
4201 ix86_gen_add3 = gen_adddi3;
4202 ix86_gen_sub3 = gen_subdi3;
4203 ix86_gen_sub3_carry = gen_subdi3_carry;
4204 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4205 ix86_gen_andsp = gen_anddi3;
4206 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4207 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4208 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4209 ix86_gen_monitor = gen_sse3_monitor_di;
4211 else
4213 ix86_gen_add3 = gen_addsi3;
4214 ix86_gen_sub3 = gen_subsi3;
4215 ix86_gen_sub3_carry = gen_subsi3_carry;
4216 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4217 ix86_gen_andsp = gen_andsi3;
4218 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4219 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4220 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4221 ix86_gen_monitor = gen_sse3_monitor_si;
4224 #ifdef USE_IX86_CLD
4225 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4226 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4227 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4228 #endif
4230 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4232 if (opts->x_flag_fentry > 0)
4233 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4234 "with -fpic");
4235 opts->x_flag_fentry = 0;
4237 else if (TARGET_SEH)
4239 if (opts->x_flag_fentry == 0)
4240 sorry ("-mno-fentry isn%'t compatible with SEH");
4241 opts->x_flag_fentry = 1;
4243 else if (opts->x_flag_fentry < 0)
4245 #if defined(PROFILE_BEFORE_PROLOGUE)
4246 opts->x_flag_fentry = 1;
4247 #else
4248 opts->x_flag_fentry = 0;
4249 #endif
4252 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4253 opts->x_target_flags |= MASK_VZEROUPPER;
4254 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4255 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4256 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4257 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4258 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4259 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4260 /* Enable 128-bit AVX instruction generation
4261 for the auto-vectorizer. */
4262 if (TARGET_AVX128_OPTIMAL
4263 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4264 opts->x_target_flags |= MASK_PREFER_AVX128;
4266 if (opts->x_ix86_recip_name)
4268 char *p = ASTRDUP (opts->x_ix86_recip_name);
4269 char *q;
4270 unsigned int mask, i;
4271 bool invert;
4273 while ((q = strtok (p, ",")) != NULL)
4275 p = NULL;
4276 if (*q == '!')
4278 invert = true;
4279 q++;
4281 else
4282 invert = false;
4284 if (!strcmp (q, "default"))
4285 mask = RECIP_MASK_ALL;
4286 else
4288 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4289 if (!strcmp (q, recip_options[i].string))
4291 mask = recip_options[i].mask;
4292 break;
4295 if (i == ARRAY_SIZE (recip_options))
4297 error ("unknown option for -mrecip=%s", q);
4298 invert = false;
4299 mask = RECIP_MASK_NONE;
4303 opts->x_recip_mask_explicit |= mask;
4304 if (invert)
4305 opts->x_recip_mask &= ~mask;
4306 else
4307 opts->x_recip_mask |= mask;
4311 if (TARGET_RECIP_P (opts->x_target_flags))
4312 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4313 else if (opts_set->x_target_flags & MASK_RECIP)
4314 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4316 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4317 for 64-bit Bionic. */
4318 if (TARGET_HAS_BIONIC
4319 && !(opts_set->x_target_flags
4320 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4321 opts->x_target_flags |= (TARGET_64BIT
4322 ? MASK_LONG_DOUBLE_128
4323 : MASK_LONG_DOUBLE_64);
4325 /* Only one of them can be active. */
4326 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4327 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4329 /* Save the initial options in case the user does function specific
4330 options. */
4331 if (main_args_p)
4332 target_option_default_node = target_option_current_node
4333 = build_target_option_node (opts);
4335 /* Handle stack protector */
4336 if (!opts_set->x_ix86_stack_protector_guard)
4337 opts->x_ix86_stack_protector_guard
4338 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4340 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4341 if (opts->x_ix86_tune_memcpy_strategy)
4343 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4344 ix86_parse_stringop_strategy_string (str, false);
4345 free (str);
4348 if (opts->x_ix86_tune_memset_strategy)
4350 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4351 ix86_parse_stringop_strategy_string (str, true);
4352 free (str);
4356 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4358 static void
4359 ix86_option_override (void)
4361 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4362 struct register_pass_info insert_vzeroupper_info
4363 = { pass_insert_vzeroupper, "reload",
4364 1, PASS_POS_INSERT_AFTER
4367 ix86_option_override_internal (true, &global_options, &global_options_set);
4370 /* This needs to be done at start up. It's convenient to do it here. */
4371 register_pass (&insert_vzeroupper_info);
4374 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4375 static char *
4376 ix86_offload_options (void)
4378 if (TARGET_LP64)
4379 return xstrdup ("-foffload-abi=lp64");
4380 return xstrdup ("-foffload-abi=ilp32");
4383 /* Update register usage after having seen the compiler flags. */
4385 static void
4386 ix86_conditional_register_usage (void)
4388 int i, c_mask;
4390 /* For 32-bit targets, squash the REX registers. */
4391 if (! TARGET_64BIT)
4393 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4394 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4395 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4396 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4397 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4398 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4401 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4402 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4403 : TARGET_64BIT ? (1 << 2)
4404 : (1 << 1));
4406 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4408 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4410 /* Set/reset conditionally defined registers from
4411 CALL_USED_REGISTERS initializer. */
4412 if (call_used_regs[i] > 1)
4413 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4415 /* Calculate registers of CLOBBERED_REGS register set
4416 as call used registers from GENERAL_REGS register set. */
4417 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4418 && call_used_regs[i])
4419 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4422 /* If MMX is disabled, squash the registers. */
4423 if (! TARGET_MMX)
4424 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4425 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4426 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4428 /* If SSE is disabled, squash the registers. */
4429 if (! TARGET_SSE)
4430 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4431 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4432 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4434 /* If the FPU is disabled, squash the registers. */
4435 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4436 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4437 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4438 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4440 /* If AVX512F is disabled, squash the registers. */
4441 if (! TARGET_AVX512F)
4443 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4444 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4446 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4447 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4450 /* If MPX is disabled, squash the registers. */
4451 if (! TARGET_MPX)
4452 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4453 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4457 /* Save the current options */
4459 static void
4460 ix86_function_specific_save (struct cl_target_option *ptr,
4461 struct gcc_options *opts)
4463 ptr->arch = ix86_arch;
4464 ptr->schedule = ix86_schedule;
4465 ptr->prefetch_sse = x86_prefetch_sse;
4466 ptr->tune = ix86_tune;
4467 ptr->branch_cost = ix86_branch_cost;
4468 ptr->tune_defaulted = ix86_tune_defaulted;
4469 ptr->arch_specified = ix86_arch_specified;
4470 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4471 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4472 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4473 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4474 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4475 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4476 ptr->x_ix86_abi = opts->x_ix86_abi;
4477 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4478 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4479 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4480 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4481 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4482 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4483 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4484 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4485 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4486 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4487 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4488 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4489 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4490 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4491 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4492 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4493 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4494 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4495 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4496 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4498 /* The fields are char but the variables are not; make sure the
4499 values fit in the fields. */
4500 gcc_assert (ptr->arch == ix86_arch);
4501 gcc_assert (ptr->schedule == ix86_schedule);
4502 gcc_assert (ptr->tune == ix86_tune);
4503 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4506 /* Restore the current options */
4508 static void
4509 ix86_function_specific_restore (struct gcc_options *opts,
4510 struct cl_target_option *ptr)
4512 enum processor_type old_tune = ix86_tune;
4513 enum processor_type old_arch = ix86_arch;
4514 unsigned int ix86_arch_mask;
4515 int i;
4517 /* We don't change -fPIC. */
4518 opts->x_flag_pic = flag_pic;
4520 ix86_arch = (enum processor_type) ptr->arch;
4521 ix86_schedule = (enum attr_cpu) ptr->schedule;
4522 ix86_tune = (enum processor_type) ptr->tune;
4523 x86_prefetch_sse = ptr->prefetch_sse;
4524 opts->x_ix86_branch_cost = ptr->branch_cost;
4525 ix86_tune_defaulted = ptr->tune_defaulted;
4526 ix86_arch_specified = ptr->arch_specified;
4527 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4528 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4529 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4530 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4531 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4532 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4533 opts->x_ix86_abi = ptr->x_ix86_abi;
4534 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4535 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4536 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4537 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4538 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4539 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4540 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4541 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4542 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4543 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4544 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4545 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4546 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4547 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4548 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4549 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4550 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4551 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4552 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4553 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4554 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4555 /* TODO: ix86_cost should be chosen at instruction or function granuality
4556 so for cold code we use size_cost even in !optimize_size compilation. */
4557 if (opts->x_optimize_size)
4558 ix86_cost = &ix86_size_cost;
4559 else
4560 ix86_cost = ix86_tune_cost;
4562 /* Recreate the arch feature tests if the arch changed */
4563 if (old_arch != ix86_arch)
4565 ix86_arch_mask = 1u << ix86_arch;
4566 for (i = 0; i < X86_ARCH_LAST; ++i)
4567 ix86_arch_features[i]
4568 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4571 /* Recreate the tune optimization tests */
4572 if (old_tune != ix86_tune)
4573 set_ix86_tune_features (ix86_tune, false);
4576 /* Adjust target options after streaming them in. This is mainly about
4577 reconciling them with global options. */
4579 static void
4580 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4582 /* flag_pic is a global option, but ix86_cmodel is target saved option
4583 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4584 for PIC, or error out. */
4585 if (flag_pic)
4586 switch (ptr->x_ix86_cmodel)
4588 case CM_SMALL:
4589 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4590 break;
4592 case CM_MEDIUM:
4593 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4594 break;
4596 case CM_LARGE:
4597 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4598 break;
4600 case CM_KERNEL:
4601 error ("code model %s does not support PIC mode", "kernel");
4602 break;
4604 default:
4605 break;
4607 else
4608 switch (ptr->x_ix86_cmodel)
4610 case CM_SMALL_PIC:
4611 ptr->x_ix86_cmodel = CM_SMALL;
4612 break;
4614 case CM_MEDIUM_PIC:
4615 ptr->x_ix86_cmodel = CM_MEDIUM;
4616 break;
4618 case CM_LARGE_PIC:
4619 ptr->x_ix86_cmodel = CM_LARGE;
4620 break;
4622 default:
4623 break;
4627 /* Print the current options */
4629 static void
4630 ix86_function_specific_print (FILE *file, int indent,
4631 struct cl_target_option *ptr)
4633 char *target_string
4634 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4635 NULL, NULL, ptr->x_ix86_fpmath, false);
4637 gcc_assert (ptr->arch < PROCESSOR_max);
4638 fprintf (file, "%*sarch = %d (%s)\n",
4639 indent, "",
4640 ptr->arch, processor_target_table[ptr->arch].name);
4642 gcc_assert (ptr->tune < PROCESSOR_max);
4643 fprintf (file, "%*stune = %d (%s)\n",
4644 indent, "",
4645 ptr->tune, processor_target_table[ptr->tune].name);
4647 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4649 if (target_string)
4651 fprintf (file, "%*s%s\n", indent, "", target_string);
4652 free (target_string);
4657 /* Inner function to process the attribute((target(...))), take an argument and
4658 set the current options from the argument. If we have a list, recursively go
4659 over the list. */
4661 static bool
4662 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4663 struct gcc_options *opts,
4664 struct gcc_options *opts_set,
4665 struct gcc_options *enum_opts_set)
4667 char *next_optstr;
4668 bool ret = true;
4670 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4671 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4672 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4673 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4674 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4676 enum ix86_opt_type
4678 ix86_opt_unknown,
4679 ix86_opt_yes,
4680 ix86_opt_no,
4681 ix86_opt_str,
4682 ix86_opt_enum,
4683 ix86_opt_isa
4686 static const struct
4688 const char *string;
4689 size_t len;
4690 enum ix86_opt_type type;
4691 int opt;
4692 int mask;
4693 } attrs[] = {
4694 /* isa options */
4695 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4696 IX86_ATTR_ISA ("abm", OPT_mabm),
4697 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4698 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4699 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4700 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4701 IX86_ATTR_ISA ("aes", OPT_maes),
4702 IX86_ATTR_ISA ("sha", OPT_msha),
4703 IX86_ATTR_ISA ("avx", OPT_mavx),
4704 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4705 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4706 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4707 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4708 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4709 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4710 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4711 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4712 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4713 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4714 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4715 IX86_ATTR_ISA ("sse", OPT_msse),
4716 IX86_ATTR_ISA ("sse2", OPT_msse2),
4717 IX86_ATTR_ISA ("sse3", OPT_msse3),
4718 IX86_ATTR_ISA ("sse4", OPT_msse4),
4719 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4720 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4721 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4722 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4723 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4724 IX86_ATTR_ISA ("fma", OPT_mfma),
4725 IX86_ATTR_ISA ("xop", OPT_mxop),
4726 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4727 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4728 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4729 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4730 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4731 IX86_ATTR_ISA ("hle", OPT_mhle),
4732 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4733 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4734 IX86_ATTR_ISA ("adx", OPT_madx),
4735 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4736 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4737 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4738 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4739 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4740 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4741 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4742 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4743 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4744 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4745 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4747 /* enum options */
4748 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4750 /* string options */
4751 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4752 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4754 /* flag options */
4755 IX86_ATTR_YES ("cld",
4756 OPT_mcld,
4757 MASK_CLD),
4759 IX86_ATTR_NO ("fancy-math-387",
4760 OPT_mfancy_math_387,
4761 MASK_NO_FANCY_MATH_387),
4763 IX86_ATTR_YES ("ieee-fp",
4764 OPT_mieee_fp,
4765 MASK_IEEE_FP),
4767 IX86_ATTR_YES ("inline-all-stringops",
4768 OPT_minline_all_stringops,
4769 MASK_INLINE_ALL_STRINGOPS),
4771 IX86_ATTR_YES ("inline-stringops-dynamically",
4772 OPT_minline_stringops_dynamically,
4773 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4775 IX86_ATTR_NO ("align-stringops",
4776 OPT_mno_align_stringops,
4777 MASK_NO_ALIGN_STRINGOPS),
4779 IX86_ATTR_YES ("recip",
4780 OPT_mrecip,
4781 MASK_RECIP),
4785 /* If this is a list, recurse to get the options. */
4786 if (TREE_CODE (args) == TREE_LIST)
4788 bool ret = true;
4790 for (; args; args = TREE_CHAIN (args))
4791 if (TREE_VALUE (args)
4792 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4793 p_strings, opts, opts_set,
4794 enum_opts_set))
4795 ret = false;
4797 return ret;
4800 else if (TREE_CODE (args) != STRING_CST)
4802 error ("attribute %<target%> argument not a string");
4803 return false;
4806 /* Handle multiple arguments separated by commas. */
4807 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4809 while (next_optstr && *next_optstr != '\0')
4811 char *p = next_optstr;
4812 char *orig_p = p;
4813 char *comma = strchr (next_optstr, ',');
4814 const char *opt_string;
4815 size_t len, opt_len;
4816 int opt;
4817 bool opt_set_p;
4818 char ch;
4819 unsigned i;
4820 enum ix86_opt_type type = ix86_opt_unknown;
4821 int mask = 0;
4823 if (comma)
4825 *comma = '\0';
4826 len = comma - next_optstr;
4827 next_optstr = comma + 1;
4829 else
4831 len = strlen (p);
4832 next_optstr = NULL;
4835 /* Recognize no-xxx. */
4836 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4838 opt_set_p = false;
4839 p += 3;
4840 len -= 3;
4842 else
4843 opt_set_p = true;
4845 /* Find the option. */
4846 ch = *p;
4847 opt = N_OPTS;
4848 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4850 type = attrs[i].type;
4851 opt_len = attrs[i].len;
4852 if (ch == attrs[i].string[0]
4853 && ((type != ix86_opt_str && type != ix86_opt_enum)
4854 ? len == opt_len
4855 : len > opt_len)
4856 && memcmp (p, attrs[i].string, opt_len) == 0)
4858 opt = attrs[i].opt;
4859 mask = attrs[i].mask;
4860 opt_string = attrs[i].string;
4861 break;
4865 /* Process the option. */
4866 if (opt == N_OPTS)
4868 error ("attribute(target(\"%s\")) is unknown", orig_p);
4869 ret = false;
4872 else if (type == ix86_opt_isa)
4874 struct cl_decoded_option decoded;
4876 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4877 ix86_handle_option (opts, opts_set,
4878 &decoded, input_location);
4881 else if (type == ix86_opt_yes || type == ix86_opt_no)
4883 if (type == ix86_opt_no)
4884 opt_set_p = !opt_set_p;
4886 if (opt_set_p)
4887 opts->x_target_flags |= mask;
4888 else
4889 opts->x_target_flags &= ~mask;
4892 else if (type == ix86_opt_str)
4894 if (p_strings[opt])
4896 error ("option(\"%s\") was already specified", opt_string);
4897 ret = false;
4899 else
4900 p_strings[opt] = xstrdup (p + opt_len);
4903 else if (type == ix86_opt_enum)
4905 bool arg_ok;
4906 int value;
4908 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4909 if (arg_ok)
4910 set_option (opts, enum_opts_set, opt, value,
4911 p + opt_len, DK_UNSPECIFIED, input_location,
4912 global_dc);
4913 else
4915 error ("attribute(target(\"%s\")) is unknown", orig_p);
4916 ret = false;
4920 else
4921 gcc_unreachable ();
4924 return ret;
4927 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4929 tree
4930 ix86_valid_target_attribute_tree (tree args,
4931 struct gcc_options *opts,
4932 struct gcc_options *opts_set)
4934 const char *orig_arch_string = opts->x_ix86_arch_string;
4935 const char *orig_tune_string = opts->x_ix86_tune_string;
4936 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4937 int orig_tune_defaulted = ix86_tune_defaulted;
4938 int orig_arch_specified = ix86_arch_specified;
4939 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4940 tree t = NULL_TREE;
4941 int i;
4942 struct cl_target_option *def
4943 = TREE_TARGET_OPTION (target_option_default_node);
4944 struct gcc_options enum_opts_set;
4946 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4948 /* Process each of the options on the chain. */
4949 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4950 opts_set, &enum_opts_set))
4951 return error_mark_node;
4953 /* If the changed options are different from the default, rerun
4954 ix86_option_override_internal, and then save the options away.
4955 The string options are are attribute options, and will be undone
4956 when we copy the save structure. */
4957 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4958 || opts->x_target_flags != def->x_target_flags
4959 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4960 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4961 || enum_opts_set.x_ix86_fpmath)
4963 /* If we are using the default tune= or arch=, undo the string assigned,
4964 and use the default. */
4965 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4966 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4967 else if (!orig_arch_specified)
4968 opts->x_ix86_arch_string = NULL;
4970 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4971 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4972 else if (orig_tune_defaulted)
4973 opts->x_ix86_tune_string = NULL;
4975 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4976 if (enum_opts_set.x_ix86_fpmath)
4977 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4978 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4979 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4981 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4982 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4985 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4986 ix86_option_override_internal (false, opts, opts_set);
4988 /* Add any builtin functions with the new isa if any. */
4989 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4991 /* Save the current options unless we are validating options for
4992 #pragma. */
4993 t = build_target_option_node (opts);
4995 opts->x_ix86_arch_string = orig_arch_string;
4996 opts->x_ix86_tune_string = orig_tune_string;
4997 opts_set->x_ix86_fpmath = orig_fpmath_set;
4999 /* Free up memory allocated to hold the strings */
5000 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5001 free (option_strings[i]);
5004 return t;
5007 /* Hook to validate attribute((target("string"))). */
5009 static bool
5010 ix86_valid_target_attribute_p (tree fndecl,
5011 tree ARG_UNUSED (name),
5012 tree args,
5013 int ARG_UNUSED (flags))
5015 struct gcc_options func_options;
5016 tree new_target, new_optimize;
5017 bool ret = true;
5019 /* attribute((target("default"))) does nothing, beyond
5020 affecting multi-versioning. */
5021 if (TREE_VALUE (args)
5022 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5023 && TREE_CHAIN (args) == NULL_TREE
5024 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5025 return true;
5027 tree old_optimize = build_optimization_node (&global_options);
5029 /* Get the optimization options of the current function. */
5030 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5032 if (!func_optimize)
5033 func_optimize = old_optimize;
5035 /* Init func_options. */
5036 memset (&func_options, 0, sizeof (func_options));
5037 init_options_struct (&func_options, NULL);
5038 lang_hooks.init_options_struct (&func_options);
5040 cl_optimization_restore (&func_options,
5041 TREE_OPTIMIZATION (func_optimize));
5043 /* Initialize func_options to the default before its target options can
5044 be set. */
5045 cl_target_option_restore (&func_options,
5046 TREE_TARGET_OPTION (target_option_default_node));
5048 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5049 &global_options_set);
5051 new_optimize = build_optimization_node (&func_options);
5053 if (new_target == error_mark_node)
5054 ret = false;
5056 else if (fndecl && new_target)
5058 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5060 if (old_optimize != new_optimize)
5061 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5064 return ret;
5068 /* Hook to determine if one function can safely inline another. */
5070 static bool
5071 ix86_can_inline_p (tree caller, tree callee)
5073 bool ret = false;
5074 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5075 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5077 /* If callee has no option attributes, then it is ok to inline. */
5078 if (!callee_tree)
5079 ret = true;
5081 /* If caller has no option attributes, but callee does then it is not ok to
5082 inline. */
5083 else if (!caller_tree)
5084 ret = false;
5086 else
5088 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5089 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5091 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5092 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5093 function. */
5094 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5095 != callee_opts->x_ix86_isa_flags)
5096 ret = false;
5098 /* See if we have the same non-isa options. */
5099 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5100 ret = false;
5102 /* See if arch, tune, etc. are the same. */
5103 else if (caller_opts->arch != callee_opts->arch)
5104 ret = false;
5106 else if (caller_opts->tune != callee_opts->tune)
5107 ret = false;
5109 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5110 ret = false;
5112 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5113 ret = false;
5115 else
5116 ret = true;
5119 return ret;
5123 /* Remember the last target of ix86_set_current_function. */
5124 static GTY(()) tree ix86_previous_fndecl;
5126 /* Set targets globals to the default (or current #pragma GCC target
5127 if active). Invalidate ix86_previous_fndecl cache. */
5129 void
5130 ix86_reset_previous_fndecl (void)
5132 tree new_tree = target_option_current_node;
5133 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5134 if (TREE_TARGET_GLOBALS (new_tree))
5135 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5136 else if (new_tree == target_option_default_node)
5137 restore_target_globals (&default_target_globals);
5138 else
5139 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5140 ix86_previous_fndecl = NULL_TREE;
5143 /* Establish appropriate back-end context for processing the function
5144 FNDECL. The argument might be NULL to indicate processing at top
5145 level, outside of any function scope. */
5146 static void
5147 ix86_set_current_function (tree fndecl)
5149 /* Only change the context if the function changes. This hook is called
5150 several times in the course of compiling a function, and we don't want to
5151 slow things down too much or call target_reinit when it isn't safe. */
5152 if (fndecl == ix86_previous_fndecl)
5153 return;
5155 tree old_tree;
5156 if (ix86_previous_fndecl == NULL_TREE)
5157 old_tree = target_option_current_node;
5158 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5159 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5160 else
5161 old_tree = target_option_default_node;
5163 if (fndecl == NULL_TREE)
5165 if (old_tree != target_option_current_node)
5166 ix86_reset_previous_fndecl ();
5167 return;
5170 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5171 if (new_tree == NULL_TREE)
5172 new_tree = target_option_default_node;
5174 if (old_tree != new_tree)
5176 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5177 if (TREE_TARGET_GLOBALS (new_tree))
5178 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5179 else if (new_tree == target_option_default_node)
5180 restore_target_globals (&default_target_globals);
5181 else
5182 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5184 ix86_previous_fndecl = fndecl;
5188 /* Return true if this goes in large data/bss. */
5190 static bool
5191 ix86_in_large_data_p (tree exp)
5193 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5194 return false;
5196 /* Functions are never large data. */
5197 if (TREE_CODE (exp) == FUNCTION_DECL)
5198 return false;
5200 /* Automatic variables are never large data. */
5201 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5202 return false;
5204 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5206 const char *section = DECL_SECTION_NAME (exp);
5207 if (strcmp (section, ".ldata") == 0
5208 || strcmp (section, ".lbss") == 0)
5209 return true;
5210 return false;
5212 else
5214 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5216 /* If this is an incomplete type with size 0, then we can't put it
5217 in data because it might be too big when completed. Also,
5218 int_size_in_bytes returns -1 if size can vary or is larger than
5219 an integer in which case also it is safer to assume that it goes in
5220 large data. */
5221 if (size <= 0 || size > ix86_section_threshold)
5222 return true;
5225 return false;
5228 /* Switch to the appropriate section for output of DECL.
5229 DECL is either a `VAR_DECL' node or a constant of some sort.
5230 RELOC indicates whether forming the initial value of DECL requires
5231 link-time relocations. */
5233 ATTRIBUTE_UNUSED static section *
5234 x86_64_elf_select_section (tree decl, int reloc,
5235 unsigned HOST_WIDE_INT align)
5237 if (ix86_in_large_data_p (decl))
5239 const char *sname = NULL;
5240 unsigned int flags = SECTION_WRITE;
5241 switch (categorize_decl_for_section (decl, reloc))
5243 case SECCAT_DATA:
5244 sname = ".ldata";
5245 break;
5246 case SECCAT_DATA_REL:
5247 sname = ".ldata.rel";
5248 break;
5249 case SECCAT_DATA_REL_LOCAL:
5250 sname = ".ldata.rel.local";
5251 break;
5252 case SECCAT_DATA_REL_RO:
5253 sname = ".ldata.rel.ro";
5254 break;
5255 case SECCAT_DATA_REL_RO_LOCAL:
5256 sname = ".ldata.rel.ro.local";
5257 break;
5258 case SECCAT_BSS:
5259 sname = ".lbss";
5260 flags |= SECTION_BSS;
5261 break;
5262 case SECCAT_RODATA:
5263 case SECCAT_RODATA_MERGE_STR:
5264 case SECCAT_RODATA_MERGE_STR_INIT:
5265 case SECCAT_RODATA_MERGE_CONST:
5266 sname = ".lrodata";
5267 flags = 0;
5268 break;
5269 case SECCAT_SRODATA:
5270 case SECCAT_SDATA:
5271 case SECCAT_SBSS:
5272 gcc_unreachable ();
5273 case SECCAT_TEXT:
5274 case SECCAT_TDATA:
5275 case SECCAT_TBSS:
5276 /* We don't split these for medium model. Place them into
5277 default sections and hope for best. */
5278 break;
5280 if (sname)
5282 /* We might get called with string constants, but get_named_section
5283 doesn't like them as they are not DECLs. Also, we need to set
5284 flags in that case. */
5285 if (!DECL_P (decl))
5286 return get_section (sname, flags, NULL);
5287 return get_named_section (decl, sname, reloc);
5290 return default_elf_select_section (decl, reloc, align);
5293 /* Select a set of attributes for section NAME based on the properties
5294 of DECL and whether or not RELOC indicates that DECL's initializer
5295 might contain runtime relocations. */
5297 static unsigned int ATTRIBUTE_UNUSED
5298 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5300 unsigned int flags = default_section_type_flags (decl, name, reloc);
5302 if (decl == NULL_TREE
5303 && (strcmp (name, ".ldata.rel.ro") == 0
5304 || strcmp (name, ".ldata.rel.ro.local") == 0))
5305 flags |= SECTION_RELRO;
5307 if (strcmp (name, ".lbss") == 0
5308 || strncmp (name, ".lbss.", 5) == 0
5309 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5310 flags |= SECTION_BSS;
5312 return flags;
5315 /* Build up a unique section name, expressed as a
5316 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5317 RELOC indicates whether the initial value of EXP requires
5318 link-time relocations. */
5320 static void ATTRIBUTE_UNUSED
5321 x86_64_elf_unique_section (tree decl, int reloc)
5323 if (ix86_in_large_data_p (decl))
5325 const char *prefix = NULL;
5326 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5327 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5329 switch (categorize_decl_for_section (decl, reloc))
5331 case SECCAT_DATA:
5332 case SECCAT_DATA_REL:
5333 case SECCAT_DATA_REL_LOCAL:
5334 case SECCAT_DATA_REL_RO:
5335 case SECCAT_DATA_REL_RO_LOCAL:
5336 prefix = one_only ? ".ld" : ".ldata";
5337 break;
5338 case SECCAT_BSS:
5339 prefix = one_only ? ".lb" : ".lbss";
5340 break;
5341 case SECCAT_RODATA:
5342 case SECCAT_RODATA_MERGE_STR:
5343 case SECCAT_RODATA_MERGE_STR_INIT:
5344 case SECCAT_RODATA_MERGE_CONST:
5345 prefix = one_only ? ".lr" : ".lrodata";
5346 break;
5347 case SECCAT_SRODATA:
5348 case SECCAT_SDATA:
5349 case SECCAT_SBSS:
5350 gcc_unreachable ();
5351 case SECCAT_TEXT:
5352 case SECCAT_TDATA:
5353 case SECCAT_TBSS:
5354 /* We don't split these for medium model. Place them into
5355 default sections and hope for best. */
5356 break;
5358 if (prefix)
5360 const char *name, *linkonce;
5361 char *string;
5363 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5364 name = targetm.strip_name_encoding (name);
5366 /* If we're using one_only, then there needs to be a .gnu.linkonce
5367 prefix to the section name. */
5368 linkonce = one_only ? ".gnu.linkonce" : "";
5370 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5372 set_decl_section_name (decl, string);
5373 return;
5376 default_unique_section (decl, reloc);
5379 #ifdef COMMON_ASM_OP
5380 /* This says how to output assembler code to declare an
5381 uninitialized external linkage data object.
5383 For medium model x86-64 we need to use .largecomm opcode for
5384 large objects. */
5385 void
5386 x86_elf_aligned_common (FILE *file,
5387 const char *name, unsigned HOST_WIDE_INT size,
5388 int align)
5390 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5391 && size > (unsigned int)ix86_section_threshold)
5392 fputs ("\t.largecomm\t", file);
5393 else
5394 fputs (COMMON_ASM_OP, file);
5395 assemble_name (file, name);
5396 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5397 size, align / BITS_PER_UNIT);
5399 #endif
5401 /* Utility function for targets to use in implementing
5402 ASM_OUTPUT_ALIGNED_BSS. */
5404 void
5405 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5406 unsigned HOST_WIDE_INT size, int align)
5408 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5409 && size > (unsigned int)ix86_section_threshold)
5410 switch_to_section (get_named_section (decl, ".lbss", 0));
5411 else
5412 switch_to_section (bss_section);
5413 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5414 #ifdef ASM_DECLARE_OBJECT_NAME
5415 last_assemble_variable_decl = decl;
5416 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5417 #else
5418 /* Standard thing is just output label for the object. */
5419 ASM_OUTPUT_LABEL (file, name);
5420 #endif /* ASM_DECLARE_OBJECT_NAME */
5421 ASM_OUTPUT_SKIP (file, size ? size : 1);
5424 /* Decide whether we must probe the stack before any space allocation
5425 on this target. It's essentially TARGET_STACK_PROBE except when
5426 -fstack-check causes the stack to be already probed differently. */
5428 bool
5429 ix86_target_stack_probe (void)
5431 /* Do not probe the stack twice if static stack checking is enabled. */
5432 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5433 return false;
5435 return TARGET_STACK_PROBE;
5438 /* Decide whether we can make a sibling call to a function. DECL is the
5439 declaration of the function being targeted by the call and EXP is the
5440 CALL_EXPR representing the call. */
5442 static bool
5443 ix86_function_ok_for_sibcall (tree decl, tree exp)
5445 tree type, decl_or_type;
5446 rtx a, b;
5448 /* If we are generating position-independent code, we cannot sibcall
5449 optimize any indirect call, or a direct call to a global function,
5450 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5451 if (!TARGET_MACHO
5452 && !TARGET_64BIT
5453 && flag_pic
5454 && (!decl || !targetm.binds_local_p (decl)))
5455 return false;
5457 /* If we need to align the outgoing stack, then sibcalling would
5458 unalign the stack, which may break the called function. */
5459 if (ix86_minimum_incoming_stack_boundary (true)
5460 < PREFERRED_STACK_BOUNDARY)
5461 return false;
5463 if (decl)
5465 decl_or_type = decl;
5466 type = TREE_TYPE (decl);
5468 else
5470 /* We're looking at the CALL_EXPR, we need the type of the function. */
5471 type = CALL_EXPR_FN (exp); /* pointer expression */
5472 type = TREE_TYPE (type); /* pointer type */
5473 type = TREE_TYPE (type); /* function type */
5474 decl_or_type = type;
5477 /* Check that the return value locations are the same. Like
5478 if we are returning floats on the 80387 register stack, we cannot
5479 make a sibcall from a function that doesn't return a float to a
5480 function that does or, conversely, from a function that does return
5481 a float to a function that doesn't; the necessary stack adjustment
5482 would not be executed. This is also the place we notice
5483 differences in the return value ABI. Note that it is ok for one
5484 of the functions to have void return type as long as the return
5485 value of the other is passed in a register. */
5486 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5487 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5488 cfun->decl, false);
5489 if (STACK_REG_P (a) || STACK_REG_P (b))
5491 if (!rtx_equal_p (a, b))
5492 return false;
5494 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5496 else if (!rtx_equal_p (a, b))
5497 return false;
5499 if (TARGET_64BIT)
5501 /* The SYSV ABI has more call-clobbered registers;
5502 disallow sibcalls from MS to SYSV. */
5503 if (cfun->machine->call_abi == MS_ABI
5504 && ix86_function_type_abi (type) == SYSV_ABI)
5505 return false;
5507 else
5509 /* If this call is indirect, we'll need to be able to use a
5510 call-clobbered register for the address of the target function.
5511 Make sure that all such registers are not used for passing
5512 parameters. Note that DLLIMPORT functions are indirect. */
5513 if (!decl
5514 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5516 if (ix86_function_regparm (type, NULL) >= 3)
5518 /* ??? Need to count the actual number of registers to be used,
5519 not the possible number of registers. Fix later. */
5520 return false;
5525 /* Otherwise okay. That also includes certain types of indirect calls. */
5526 return true;
5529 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5530 and "sseregparm" calling convention attributes;
5531 arguments as in struct attribute_spec.handler. */
5533 static tree
5534 ix86_handle_cconv_attribute (tree *node, tree name,
5535 tree args,
5536 int,
5537 bool *no_add_attrs)
5539 if (TREE_CODE (*node) != FUNCTION_TYPE
5540 && TREE_CODE (*node) != METHOD_TYPE
5541 && TREE_CODE (*node) != FIELD_DECL
5542 && TREE_CODE (*node) != TYPE_DECL)
5544 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5545 name);
5546 *no_add_attrs = true;
5547 return NULL_TREE;
5550 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5551 if (is_attribute_p ("regparm", name))
5553 tree cst;
5555 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5557 error ("fastcall and regparm attributes are not compatible");
5560 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5562 error ("regparam and thiscall attributes are not compatible");
5565 cst = TREE_VALUE (args);
5566 if (TREE_CODE (cst) != INTEGER_CST)
5568 warning (OPT_Wattributes,
5569 "%qE attribute requires an integer constant argument",
5570 name);
5571 *no_add_attrs = true;
5573 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5575 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5576 name, REGPARM_MAX);
5577 *no_add_attrs = true;
5580 return NULL_TREE;
5583 if (TARGET_64BIT)
5585 /* Do not warn when emulating the MS ABI. */
5586 if ((TREE_CODE (*node) != FUNCTION_TYPE
5587 && TREE_CODE (*node) != METHOD_TYPE)
5588 || ix86_function_type_abi (*node) != MS_ABI)
5589 warning (OPT_Wattributes, "%qE attribute ignored",
5590 name);
5591 *no_add_attrs = true;
5592 return NULL_TREE;
5595 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5596 if (is_attribute_p ("fastcall", name))
5598 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5600 error ("fastcall and cdecl attributes are not compatible");
5602 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5604 error ("fastcall and stdcall attributes are not compatible");
5606 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5608 error ("fastcall and regparm attributes are not compatible");
5610 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5612 error ("fastcall and thiscall attributes are not compatible");
5616 /* Can combine stdcall with fastcall (redundant), regparm and
5617 sseregparm. */
5618 else if (is_attribute_p ("stdcall", name))
5620 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5622 error ("stdcall and cdecl attributes are not compatible");
5624 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5626 error ("stdcall and fastcall attributes are not compatible");
5628 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5630 error ("stdcall and thiscall attributes are not compatible");
5634 /* Can combine cdecl with regparm and sseregparm. */
5635 else if (is_attribute_p ("cdecl", name))
5637 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5639 error ("stdcall and cdecl attributes are not compatible");
5641 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5643 error ("fastcall and cdecl attributes are not compatible");
5645 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5647 error ("cdecl and thiscall attributes are not compatible");
5650 else if (is_attribute_p ("thiscall", name))
5652 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5653 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5654 name);
5655 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5657 error ("stdcall and thiscall attributes are not compatible");
5659 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5661 error ("fastcall and thiscall attributes are not compatible");
5663 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5665 error ("cdecl and thiscall attributes are not compatible");
5669 /* Can combine sseregparm with all attributes. */
5671 return NULL_TREE;
5674 /* The transactional memory builtins are implicitly regparm or fastcall
5675 depending on the ABI. Override the generic do-nothing attribute that
5676 these builtins were declared with, and replace it with one of the two
5677 attributes that we expect elsewhere. */
5679 static tree
5680 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5681 int flags, bool *no_add_attrs)
5683 tree alt;
5685 /* In no case do we want to add the placeholder attribute. */
5686 *no_add_attrs = true;
5688 /* The 64-bit ABI is unchanged for transactional memory. */
5689 if (TARGET_64BIT)
5690 return NULL_TREE;
5692 /* ??? Is there a better way to validate 32-bit windows? We have
5693 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5694 if (CHECK_STACK_LIMIT > 0)
5695 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5696 else
5698 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5699 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5701 decl_attributes (node, alt, flags);
5703 return NULL_TREE;
5706 /* This function determines from TYPE the calling-convention. */
5708 unsigned int
5709 ix86_get_callcvt (const_tree type)
5711 unsigned int ret = 0;
5712 bool is_stdarg;
5713 tree attrs;
5715 if (TARGET_64BIT)
5716 return IX86_CALLCVT_CDECL;
5718 attrs = TYPE_ATTRIBUTES (type);
5719 if (attrs != NULL_TREE)
5721 if (lookup_attribute ("cdecl", attrs))
5722 ret |= IX86_CALLCVT_CDECL;
5723 else if (lookup_attribute ("stdcall", attrs))
5724 ret |= IX86_CALLCVT_STDCALL;
5725 else if (lookup_attribute ("fastcall", attrs))
5726 ret |= IX86_CALLCVT_FASTCALL;
5727 else if (lookup_attribute ("thiscall", attrs))
5728 ret |= IX86_CALLCVT_THISCALL;
5730 /* Regparam isn't allowed for thiscall and fastcall. */
5731 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5733 if (lookup_attribute ("regparm", attrs))
5734 ret |= IX86_CALLCVT_REGPARM;
5735 if (lookup_attribute ("sseregparm", attrs))
5736 ret |= IX86_CALLCVT_SSEREGPARM;
5739 if (IX86_BASE_CALLCVT(ret) != 0)
5740 return ret;
5743 is_stdarg = stdarg_p (type);
5744 if (TARGET_RTD && !is_stdarg)
5745 return IX86_CALLCVT_STDCALL | ret;
5747 if (ret != 0
5748 || is_stdarg
5749 || TREE_CODE (type) != METHOD_TYPE
5750 || ix86_function_type_abi (type) != MS_ABI)
5751 return IX86_CALLCVT_CDECL | ret;
5753 return IX86_CALLCVT_THISCALL;
5756 /* Return 0 if the attributes for two types are incompatible, 1 if they
5757 are compatible, and 2 if they are nearly compatible (which causes a
5758 warning to be generated). */
5760 static int
5761 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5763 unsigned int ccvt1, ccvt2;
5765 if (TREE_CODE (type1) != FUNCTION_TYPE
5766 && TREE_CODE (type1) != METHOD_TYPE)
5767 return 1;
5769 ccvt1 = ix86_get_callcvt (type1);
5770 ccvt2 = ix86_get_callcvt (type2);
5771 if (ccvt1 != ccvt2)
5772 return 0;
5773 if (ix86_function_regparm (type1, NULL)
5774 != ix86_function_regparm (type2, NULL))
5775 return 0;
5777 return 1;
5780 /* Return the regparm value for a function with the indicated TYPE and DECL.
5781 DECL may be NULL when calling function indirectly
5782 or considering a libcall. */
5784 static int
5785 ix86_function_regparm (const_tree type, const_tree decl)
5787 tree attr;
5788 int regparm;
5789 unsigned int ccvt;
5791 if (TARGET_64BIT)
5792 return (ix86_function_type_abi (type) == SYSV_ABI
5793 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5794 ccvt = ix86_get_callcvt (type);
5795 regparm = ix86_regparm;
5797 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5799 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5800 if (attr)
5802 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5803 return regparm;
5806 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5807 return 2;
5808 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5809 return 1;
5811 /* Use register calling convention for local functions when possible. */
5812 if (decl
5813 && TREE_CODE (decl) == FUNCTION_DECL)
5815 cgraph_node *target = cgraph_node::get (decl);
5816 if (target)
5817 target = target->function_symbol ();
5819 /* Caller and callee must agree on the calling convention, so
5820 checking here just optimize means that with
5821 __attribute__((optimize (...))) caller could use regparm convention
5822 and callee not, or vice versa. Instead look at whether the callee
5823 is optimized or not. */
5824 if (target && opt_for_fn (target->decl, optimize)
5825 && !(profile_flag && !flag_fentry))
5827 cgraph_local_info *i = &target->local;
5828 if (i && i->local && i->can_change_signature)
5830 int local_regparm, globals = 0, regno;
5832 /* Make sure no regparm register is taken by a
5833 fixed register variable. */
5834 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5835 local_regparm++)
5836 if (fixed_regs[local_regparm])
5837 break;
5839 /* We don't want to use regparm(3) for nested functions as
5840 these use a static chain pointer in the third argument. */
5841 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5842 local_regparm = 2;
5844 /* Save a register for the split stack. */
5845 if (local_regparm == 3 && flag_split_stack)
5846 local_regparm = 2;
5848 /* Each fixed register usage increases register pressure,
5849 so less registers should be used for argument passing.
5850 This functionality can be overriden by an explicit
5851 regparm value. */
5852 for (regno = AX_REG; regno <= DI_REG; regno++)
5853 if (fixed_regs[regno])
5854 globals++;
5856 local_regparm
5857 = globals < local_regparm ? local_regparm - globals : 0;
5859 if (local_regparm > regparm)
5860 regparm = local_regparm;
5865 return regparm;
5868 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5869 DFmode (2) arguments in SSE registers for a function with the
5870 indicated TYPE and DECL. DECL may be NULL when calling function
5871 indirectly or considering a libcall. Otherwise return 0. */
5873 static int
5874 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5876 gcc_assert (!TARGET_64BIT);
5878 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5879 by the sseregparm attribute. */
5880 if (TARGET_SSEREGPARM
5881 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5883 if (!TARGET_SSE)
5885 if (warn)
5887 if (decl)
5888 error ("calling %qD with attribute sseregparm without "
5889 "SSE/SSE2 enabled", decl);
5890 else
5891 error ("calling %qT with attribute sseregparm without "
5892 "SSE/SSE2 enabled", type);
5894 return 0;
5897 return 2;
5900 if (!decl)
5901 return 0;
5903 cgraph_node *target = cgraph_node::get (decl);
5904 if (target)
5905 target = target->function_symbol ();
5907 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5908 (and DFmode for SSE2) arguments in SSE registers. */
5909 if (target
5910 /* TARGET_SSE_MATH */
5911 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5912 && opt_for_fn (target->decl, optimize)
5913 && !(profile_flag && !flag_fentry))
5915 cgraph_local_info *i = &target->local;
5916 if (i && i->local && i->can_change_signature)
5918 /* Refuse to produce wrong code when local function with SSE enabled
5919 is called from SSE disabled function.
5920 We may work hard to work out these scenarios but hopefully
5921 it doesnot matter in practice. */
5922 if (!TARGET_SSE && warn)
5924 error ("calling %qD with SSE caling convention without "
5925 "SSE/SSE2 enabled", decl);
5926 return 0;
5928 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5929 ->x_ix86_isa_flags) ? 2 : 1;
5933 return 0;
5936 /* Return true if EAX is live at the start of the function. Used by
5937 ix86_expand_prologue to determine if we need special help before
5938 calling allocate_stack_worker. */
5940 static bool
5941 ix86_eax_live_at_start_p (void)
5943 /* Cheat. Don't bother working forward from ix86_function_regparm
5944 to the function type to whether an actual argument is located in
5945 eax. Instead just look at cfg info, which is still close enough
5946 to correct at this point. This gives false positives for broken
5947 functions that might use uninitialized data that happens to be
5948 allocated in eax, but who cares? */
5949 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5952 static bool
5953 ix86_keep_aggregate_return_pointer (tree fntype)
5955 tree attr;
5957 if (!TARGET_64BIT)
5959 attr = lookup_attribute ("callee_pop_aggregate_return",
5960 TYPE_ATTRIBUTES (fntype));
5961 if (attr)
5962 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5964 /* For 32-bit MS-ABI the default is to keep aggregate
5965 return pointer. */
5966 if (ix86_function_type_abi (fntype) == MS_ABI)
5967 return true;
5969 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5972 /* Value is the number of bytes of arguments automatically
5973 popped when returning from a subroutine call.
5974 FUNDECL is the declaration node of the function (as a tree),
5975 FUNTYPE is the data type of the function (as a tree),
5976 or for a library call it is an identifier node for the subroutine name.
5977 SIZE is the number of bytes of arguments passed on the stack.
5979 On the 80386, the RTD insn may be used to pop them if the number
5980 of args is fixed, but if the number is variable then the caller
5981 must pop them all. RTD can't be used for library calls now
5982 because the library is compiled with the Unix compiler.
5983 Use of RTD is a selectable option, since it is incompatible with
5984 standard Unix calling sequences. If the option is not selected,
5985 the caller must always pop the args.
5987 The attribute stdcall is equivalent to RTD on a per module basis. */
5989 static int
5990 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5992 unsigned int ccvt;
5994 /* None of the 64-bit ABIs pop arguments. */
5995 if (TARGET_64BIT)
5996 return 0;
5998 ccvt = ix86_get_callcvt (funtype);
6000 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6001 | IX86_CALLCVT_THISCALL)) != 0
6002 && ! stdarg_p (funtype))
6003 return size;
6005 /* Lose any fake structure return argument if it is passed on the stack. */
6006 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6007 && !ix86_keep_aggregate_return_pointer (funtype))
6009 int nregs = ix86_function_regparm (funtype, fundecl);
6010 if (nregs == 0)
6011 return GET_MODE_SIZE (Pmode);
6014 return 0;
6017 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6019 static bool
6020 ix86_legitimate_combined_insn (rtx_insn *insn)
6022 /* Check operand constraints in case hard registers were propagated
6023 into insn pattern. This check prevents combine pass from
6024 generating insn patterns with invalid hard register operands.
6025 These invalid insns can eventually confuse reload to error out
6026 with a spill failure. See also PRs 46829 and 46843. */
6027 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6029 int i;
6031 extract_insn (insn);
6032 preprocess_constraints (insn);
6034 int n_operands = recog_data.n_operands;
6035 int n_alternatives = recog_data.n_alternatives;
6036 for (i = 0; i < n_operands; i++)
6038 rtx op = recog_data.operand[i];
6039 machine_mode mode = GET_MODE (op);
6040 const operand_alternative *op_alt;
6041 int offset = 0;
6042 bool win;
6043 int j;
6045 /* For pre-AVX disallow unaligned loads/stores where the
6046 instructions don't support it. */
6047 if (!TARGET_AVX
6048 && VECTOR_MODE_P (GET_MODE (op))
6049 && misaligned_operand (op, GET_MODE (op)))
6051 int min_align = get_attr_ssememalign (insn);
6052 if (min_align == 0)
6053 return false;
6056 /* A unary operator may be accepted by the predicate, but it
6057 is irrelevant for matching constraints. */
6058 if (UNARY_P (op))
6059 op = XEXP (op, 0);
6061 if (GET_CODE (op) == SUBREG)
6063 if (REG_P (SUBREG_REG (op))
6064 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6065 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6066 GET_MODE (SUBREG_REG (op)),
6067 SUBREG_BYTE (op),
6068 GET_MODE (op));
6069 op = SUBREG_REG (op);
6072 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6073 continue;
6075 op_alt = recog_op_alt;
6077 /* Operand has no constraints, anything is OK. */
6078 win = !n_alternatives;
6080 alternative_mask preferred = get_preferred_alternatives (insn);
6081 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6083 if (!TEST_BIT (preferred, j))
6084 continue;
6085 if (op_alt[i].anything_ok
6086 || (op_alt[i].matches != -1
6087 && operands_match_p
6088 (recog_data.operand[i],
6089 recog_data.operand[op_alt[i].matches]))
6090 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6092 win = true;
6093 break;
6097 if (!win)
6098 return false;
6102 return true;
6105 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6107 static unsigned HOST_WIDE_INT
6108 ix86_asan_shadow_offset (void)
6110 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6111 : HOST_WIDE_INT_C (0x7fff8000))
6112 : (HOST_WIDE_INT_1 << 29);
6115 /* Argument support functions. */
6117 /* Return true when register may be used to pass function parameters. */
6118 bool
6119 ix86_function_arg_regno_p (int regno)
6121 int i;
6122 const int *parm_regs;
6124 if (TARGET_MPX && BND_REGNO_P (regno))
6125 return true;
6127 if (!TARGET_64BIT)
6129 if (TARGET_MACHO)
6130 return (regno < REGPARM_MAX
6131 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6132 else
6133 return (regno < REGPARM_MAX
6134 || (TARGET_MMX && MMX_REGNO_P (regno)
6135 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6136 || (TARGET_SSE && SSE_REGNO_P (regno)
6137 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6140 if (TARGET_SSE && SSE_REGNO_P (regno)
6141 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6142 return true;
6144 /* TODO: The function should depend on current function ABI but
6145 builtins.c would need updating then. Therefore we use the
6146 default ABI. */
6148 /* RAX is used as hidden argument to va_arg functions. */
6149 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6150 return true;
6152 if (ix86_abi == MS_ABI)
6153 parm_regs = x86_64_ms_abi_int_parameter_registers;
6154 else
6155 parm_regs = x86_64_int_parameter_registers;
6156 for (i = 0; i < (ix86_abi == MS_ABI
6157 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6158 if (regno == parm_regs[i])
6159 return true;
6160 return false;
6163 /* Return if we do not know how to pass TYPE solely in registers. */
6165 static bool
6166 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6168 if (must_pass_in_stack_var_size_or_pad (mode, type))
6169 return true;
6171 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6172 The layout_type routine is crafty and tries to trick us into passing
6173 currently unsupported vector types on the stack by using TImode. */
6174 return (!TARGET_64BIT && mode == TImode
6175 && type && TREE_CODE (type) != VECTOR_TYPE);
6178 /* It returns the size, in bytes, of the area reserved for arguments passed
6179 in registers for the function represented by fndecl dependent to the used
6180 abi format. */
6182 ix86_reg_parm_stack_space (const_tree fndecl)
6184 enum calling_abi call_abi = SYSV_ABI;
6185 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6186 call_abi = ix86_function_abi (fndecl);
6187 else
6188 call_abi = ix86_function_type_abi (fndecl);
6189 if (TARGET_64BIT && call_abi == MS_ABI)
6190 return 32;
6191 return 0;
6194 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6195 call abi used. */
6196 enum calling_abi
6197 ix86_function_type_abi (const_tree fntype)
6199 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6201 enum calling_abi abi = ix86_abi;
6202 if (abi == SYSV_ABI)
6204 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6206 if (TARGET_X32)
6208 static bool warned = false;
6209 if (!warned)
6211 error ("X32 does not support ms_abi attribute");
6212 warned = true;
6215 abi = MS_ABI;
6218 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6219 abi = SYSV_ABI;
6220 return abi;
6222 return ix86_abi;
6225 /* We add this as a workaround in order to use libc_has_function
6226 hook in i386.md. */
6227 bool
6228 ix86_libc_has_function (enum function_class fn_class)
6230 return targetm.libc_has_function (fn_class);
6233 static bool
6234 ix86_function_ms_hook_prologue (const_tree fn)
6236 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6238 if (decl_function_context (fn) != NULL_TREE)
6239 error_at (DECL_SOURCE_LOCATION (fn),
6240 "ms_hook_prologue is not compatible with nested function");
6241 else
6242 return true;
6244 return false;
6247 static enum calling_abi
6248 ix86_function_abi (const_tree fndecl)
6250 if (! fndecl)
6251 return ix86_abi;
6252 return ix86_function_type_abi (TREE_TYPE (fndecl));
6255 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6256 call abi used. */
6257 enum calling_abi
6258 ix86_cfun_abi (void)
6260 if (! cfun)
6261 return ix86_abi;
6262 return cfun->machine->call_abi;
6265 /* Write the extra assembler code needed to declare a function properly. */
6267 void
6268 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6269 tree decl)
6271 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6273 if (is_ms_hook)
6275 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6276 unsigned int filler_cc = 0xcccccccc;
6278 for (i = 0; i < filler_count; i += 4)
6279 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6282 #ifdef SUBTARGET_ASM_UNWIND_INIT
6283 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6284 #endif
6286 ASM_OUTPUT_LABEL (asm_out_file, fname);
6288 /* Output magic byte marker, if hot-patch attribute is set. */
6289 if (is_ms_hook)
6291 if (TARGET_64BIT)
6293 /* leaq [%rsp + 0], %rsp */
6294 asm_fprintf (asm_out_file, ASM_BYTE
6295 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6297 else
6299 /* movl.s %edi, %edi
6300 push %ebp
6301 movl.s %esp, %ebp */
6302 asm_fprintf (asm_out_file, ASM_BYTE
6303 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6308 /* regclass.c */
6309 extern void init_regs (void);
6311 /* Implementation of call abi switching target hook. Specific to FNDECL
6312 the specific call register sets are set. See also
6313 ix86_conditional_register_usage for more details. */
6314 void
6315 ix86_call_abi_override (const_tree fndecl)
6317 if (fndecl == NULL_TREE)
6318 cfun->machine->call_abi = ix86_abi;
6319 else
6320 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6323 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6324 expensive re-initialization of init_regs each time we switch function context
6325 since this is needed only during RTL expansion. */
6326 static void
6327 ix86_maybe_switch_abi (void)
6329 if (TARGET_64BIT &&
6330 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6331 reinit_regs ();
6334 /* Return 1 if pseudo register should be created and used to hold
6335 GOT address for PIC code. */
6336 bool
6337 ix86_use_pseudo_pic_reg (void)
6339 if ((TARGET_64BIT
6340 && (ix86_cmodel == CM_SMALL_PIC
6341 || TARGET_PECOFF))
6342 || !flag_pic)
6343 return false;
6344 return true;
6347 /* Initialize large model PIC register. */
6349 static void
6350 ix86_init_large_pic_reg (unsigned int tmp_regno)
6352 rtx_code_label *label;
6353 rtx tmp_reg;
6355 gcc_assert (Pmode == DImode);
6356 label = gen_label_rtx ();
6357 emit_label (label);
6358 LABEL_PRESERVE_P (label) = 1;
6359 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6360 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6361 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6362 label));
6363 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6364 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6365 pic_offset_table_rtx, tmp_reg));
6368 /* Create and initialize PIC register if required. */
6369 static void
6370 ix86_init_pic_reg (void)
6372 edge entry_edge;
6373 rtx_insn *seq;
6375 if (!ix86_use_pseudo_pic_reg ())
6376 return;
6378 start_sequence ();
6380 if (TARGET_64BIT)
6382 if (ix86_cmodel == CM_LARGE_PIC)
6383 ix86_init_large_pic_reg (R11_REG);
6384 else
6385 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6387 else
6389 /* If there is future mcount call in the function it is more profitable
6390 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6391 rtx reg = crtl->profile
6392 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6393 : pic_offset_table_rtx;
6394 rtx insn = emit_insn (gen_set_got (reg));
6395 RTX_FRAME_RELATED_P (insn) = 1;
6396 if (crtl->profile)
6397 emit_move_insn (pic_offset_table_rtx, reg);
6398 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6401 seq = get_insns ();
6402 end_sequence ();
6404 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6405 insert_insn_on_edge (seq, entry_edge);
6406 commit_one_edge_insertion (entry_edge);
6409 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6410 for a call to a function whose data type is FNTYPE.
6411 For a library call, FNTYPE is 0. */
6413 void
6414 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6415 tree fntype, /* tree ptr for function decl */
6416 rtx libname, /* SYMBOL_REF of library name or 0 */
6417 tree fndecl,
6418 int caller)
6420 struct cgraph_local_info *i = NULL;
6421 struct cgraph_node *target = NULL;
6423 memset (cum, 0, sizeof (*cum));
6425 if (fndecl)
6427 target = cgraph_node::get (fndecl);
6428 if (target)
6430 target = target->function_symbol ();
6431 i = cgraph_node::local_info (target->decl);
6432 cum->call_abi = ix86_function_abi (target->decl);
6434 else
6435 cum->call_abi = ix86_function_abi (fndecl);
6437 else
6438 cum->call_abi = ix86_function_type_abi (fntype);
6440 cum->caller = caller;
6442 /* Set up the number of registers to use for passing arguments. */
6443 cum->nregs = ix86_regparm;
6444 if (TARGET_64BIT)
6446 cum->nregs = (cum->call_abi == SYSV_ABI
6447 ? X86_64_REGPARM_MAX
6448 : X86_64_MS_REGPARM_MAX);
6450 if (TARGET_SSE)
6452 cum->sse_nregs = SSE_REGPARM_MAX;
6453 if (TARGET_64BIT)
6455 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6456 ? X86_64_SSE_REGPARM_MAX
6457 : X86_64_MS_SSE_REGPARM_MAX);
6460 if (TARGET_MMX)
6461 cum->mmx_nregs = MMX_REGPARM_MAX;
6462 cum->warn_avx512f = true;
6463 cum->warn_avx = true;
6464 cum->warn_sse = true;
6465 cum->warn_mmx = true;
6467 /* Because type might mismatch in between caller and callee, we need to
6468 use actual type of function for local calls.
6469 FIXME: cgraph_analyze can be told to actually record if function uses
6470 va_start so for local functions maybe_vaarg can be made aggressive
6471 helping K&R code.
6472 FIXME: once typesytem is fixed, we won't need this code anymore. */
6473 if (i && i->local && i->can_change_signature)
6474 fntype = TREE_TYPE (target->decl);
6475 cum->stdarg = stdarg_p (fntype);
6476 cum->maybe_vaarg = (fntype
6477 ? (!prototype_p (fntype) || stdarg_p (fntype))
6478 : !libname);
6480 cum->bnd_regno = FIRST_BND_REG;
6481 cum->bnds_in_bt = 0;
6482 cum->force_bnd_pass = 0;
6484 if (!TARGET_64BIT)
6486 /* If there are variable arguments, then we won't pass anything
6487 in registers in 32-bit mode. */
6488 if (stdarg_p (fntype))
6490 cum->nregs = 0;
6491 cum->sse_nregs = 0;
6492 cum->mmx_nregs = 0;
6493 cum->warn_avx512f = false;
6494 cum->warn_avx = false;
6495 cum->warn_sse = false;
6496 cum->warn_mmx = false;
6497 return;
6500 /* Use ecx and edx registers if function has fastcall attribute,
6501 else look for regparm information. */
6502 if (fntype)
6504 unsigned int ccvt = ix86_get_callcvt (fntype);
6505 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6507 cum->nregs = 1;
6508 cum->fastcall = 1; /* Same first register as in fastcall. */
6510 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6512 cum->nregs = 2;
6513 cum->fastcall = 1;
6515 else
6516 cum->nregs = ix86_function_regparm (fntype, fndecl);
6519 /* Set up the number of SSE registers used for passing SFmode
6520 and DFmode arguments. Warn for mismatching ABI. */
6521 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6525 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6526 But in the case of vector types, it is some vector mode.
6528 When we have only some of our vector isa extensions enabled, then there
6529 are some modes for which vector_mode_supported_p is false. For these
6530 modes, the generic vector support in gcc will choose some non-vector mode
6531 in order to implement the type. By computing the natural mode, we'll
6532 select the proper ABI location for the operand and not depend on whatever
6533 the middle-end decides to do with these vector types.
6535 The midde-end can't deal with the vector types > 16 bytes. In this
6536 case, we return the original mode and warn ABI change if CUM isn't
6537 NULL.
6539 If INT_RETURN is true, warn ABI change if the vector mode isn't
6540 available for function return value. */
6542 static machine_mode
6543 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6544 bool in_return)
6546 machine_mode mode = TYPE_MODE (type);
6548 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6550 HOST_WIDE_INT size = int_size_in_bytes (type);
6551 if ((size == 8 || size == 16 || size == 32 || size == 64)
6552 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6553 && TYPE_VECTOR_SUBPARTS (type) > 1)
6555 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6557 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6558 mode = MIN_MODE_VECTOR_FLOAT;
6559 else
6560 mode = MIN_MODE_VECTOR_INT;
6562 /* Get the mode which has this inner mode and number of units. */
6563 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6564 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6565 && GET_MODE_INNER (mode) == innermode)
6567 if (size == 64 && !TARGET_AVX512F)
6569 static bool warnedavx512f;
6570 static bool warnedavx512f_ret;
6572 if (cum && cum->warn_avx512f && !warnedavx512f)
6574 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6575 "without AVX512F enabled changes the ABI"))
6576 warnedavx512f = true;
6578 else if (in_return && !warnedavx512f_ret)
6580 if (warning (OPT_Wpsabi, "AVX512F vector return "
6581 "without AVX512F enabled changes the ABI"))
6582 warnedavx512f_ret = true;
6585 return TYPE_MODE (type);
6587 else if (size == 32 && !TARGET_AVX)
6589 static bool warnedavx;
6590 static bool warnedavx_ret;
6592 if (cum && cum->warn_avx && !warnedavx)
6594 if (warning (OPT_Wpsabi, "AVX vector argument "
6595 "without AVX enabled changes the ABI"))
6596 warnedavx = true;
6598 else if (in_return && !warnedavx_ret)
6600 if (warning (OPT_Wpsabi, "AVX vector return "
6601 "without AVX enabled changes the ABI"))
6602 warnedavx_ret = true;
6605 return TYPE_MODE (type);
6607 else if (((size == 8 && TARGET_64BIT) || size == 16)
6608 && !TARGET_SSE)
6610 static bool warnedsse;
6611 static bool warnedsse_ret;
6613 if (cum && cum->warn_sse && !warnedsse)
6615 if (warning (OPT_Wpsabi, "SSE vector argument "
6616 "without SSE enabled changes the ABI"))
6617 warnedsse = true;
6619 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6621 if (warning (OPT_Wpsabi, "SSE vector return "
6622 "without SSE enabled changes the ABI"))
6623 warnedsse_ret = true;
6626 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6628 static bool warnedmmx;
6629 static bool warnedmmx_ret;
6631 if (cum && cum->warn_mmx && !warnedmmx)
6633 if (warning (OPT_Wpsabi, "MMX vector argument "
6634 "without MMX enabled changes the ABI"))
6635 warnedmmx = true;
6637 else if (in_return && !warnedmmx_ret)
6639 if (warning (OPT_Wpsabi, "MMX vector return "
6640 "without MMX enabled changes the ABI"))
6641 warnedmmx_ret = true;
6644 return mode;
6647 gcc_unreachable ();
6651 return mode;
6654 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6655 this may not agree with the mode that the type system has chosen for the
6656 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6657 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6659 static rtx
6660 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6661 unsigned int regno)
6663 rtx tmp;
6665 if (orig_mode != BLKmode)
6666 tmp = gen_rtx_REG (orig_mode, regno);
6667 else
6669 tmp = gen_rtx_REG (mode, regno);
6670 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6671 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6674 return tmp;
6677 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6678 of this code is to classify each 8bytes of incoming argument by the register
6679 class and assign registers accordingly. */
6681 /* Return the union class of CLASS1 and CLASS2.
6682 See the x86-64 PS ABI for details. */
6684 static enum x86_64_reg_class
6685 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6687 /* Rule #1: If both classes are equal, this is the resulting class. */
6688 if (class1 == class2)
6689 return class1;
6691 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6692 the other class. */
6693 if (class1 == X86_64_NO_CLASS)
6694 return class2;
6695 if (class2 == X86_64_NO_CLASS)
6696 return class1;
6698 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6699 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6700 return X86_64_MEMORY_CLASS;
6702 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6703 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6704 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6705 return X86_64_INTEGERSI_CLASS;
6706 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6707 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6708 return X86_64_INTEGER_CLASS;
6710 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6711 MEMORY is used. */
6712 if (class1 == X86_64_X87_CLASS
6713 || class1 == X86_64_X87UP_CLASS
6714 || class1 == X86_64_COMPLEX_X87_CLASS
6715 || class2 == X86_64_X87_CLASS
6716 || class2 == X86_64_X87UP_CLASS
6717 || class2 == X86_64_COMPLEX_X87_CLASS)
6718 return X86_64_MEMORY_CLASS;
6720 /* Rule #6: Otherwise class SSE is used. */
6721 return X86_64_SSE_CLASS;
6724 /* Classify the argument of type TYPE and mode MODE.
6725 CLASSES will be filled by the register class used to pass each word
6726 of the operand. The number of words is returned. In case the parameter
6727 should be passed in memory, 0 is returned. As a special case for zero
6728 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6730 BIT_OFFSET is used internally for handling records and specifies offset
6731 of the offset in bits modulo 512 to avoid overflow cases.
6733 See the x86-64 PS ABI for details.
6736 static int
6737 classify_argument (machine_mode mode, const_tree type,
6738 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6740 HOST_WIDE_INT bytes =
6741 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6742 int words
6743 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6745 /* Variable sized entities are always passed/returned in memory. */
6746 if (bytes < 0)
6747 return 0;
6749 if (mode != VOIDmode
6750 && targetm.calls.must_pass_in_stack (mode, type))
6751 return 0;
6753 if (type && AGGREGATE_TYPE_P (type))
6755 int i;
6756 tree field;
6757 enum x86_64_reg_class subclasses[MAX_CLASSES];
6759 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6760 if (bytes > 64)
6761 return 0;
6763 for (i = 0; i < words; i++)
6764 classes[i] = X86_64_NO_CLASS;
6766 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6767 signalize memory class, so handle it as special case. */
6768 if (!words)
6770 classes[0] = X86_64_NO_CLASS;
6771 return 1;
6774 /* Classify each field of record and merge classes. */
6775 switch (TREE_CODE (type))
6777 case RECORD_TYPE:
6778 /* And now merge the fields of structure. */
6779 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6781 if (TREE_CODE (field) == FIELD_DECL)
6783 int num;
6785 if (TREE_TYPE (field) == error_mark_node)
6786 continue;
6788 /* Bitfields are always classified as integer. Handle them
6789 early, since later code would consider them to be
6790 misaligned integers. */
6791 if (DECL_BIT_FIELD (field))
6793 for (i = (int_bit_position (field)
6794 + (bit_offset % 64)) / 8 / 8;
6795 i < ((int_bit_position (field) + (bit_offset % 64))
6796 + tree_to_shwi (DECL_SIZE (field))
6797 + 63) / 8 / 8; i++)
6798 classes[i] =
6799 merge_classes (X86_64_INTEGER_CLASS,
6800 classes[i]);
6802 else
6804 int pos;
6806 type = TREE_TYPE (field);
6808 /* Flexible array member is ignored. */
6809 if (TYPE_MODE (type) == BLKmode
6810 && TREE_CODE (type) == ARRAY_TYPE
6811 && TYPE_SIZE (type) == NULL_TREE
6812 && TYPE_DOMAIN (type) != NULL_TREE
6813 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6814 == NULL_TREE))
6816 static bool warned;
6818 if (!warned && warn_psabi)
6820 warned = true;
6821 inform (input_location,
6822 "the ABI of passing struct with"
6823 " a flexible array member has"
6824 " changed in GCC 4.4");
6826 continue;
6828 num = classify_argument (TYPE_MODE (type), type,
6829 subclasses,
6830 (int_bit_position (field)
6831 + bit_offset) % 512);
6832 if (!num)
6833 return 0;
6834 pos = (int_bit_position (field)
6835 + (bit_offset % 64)) / 8 / 8;
6836 for (i = 0; i < num && (i + pos) < words; i++)
6837 classes[i + pos] =
6838 merge_classes (subclasses[i], classes[i + pos]);
6842 break;
6844 case ARRAY_TYPE:
6845 /* Arrays are handled as small records. */
6847 int num;
6848 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6849 TREE_TYPE (type), subclasses, bit_offset);
6850 if (!num)
6851 return 0;
6853 /* The partial classes are now full classes. */
6854 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6855 subclasses[0] = X86_64_SSE_CLASS;
6856 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6857 && !((bit_offset % 64) == 0 && bytes == 4))
6858 subclasses[0] = X86_64_INTEGER_CLASS;
6860 for (i = 0; i < words; i++)
6861 classes[i] = subclasses[i % num];
6863 break;
6865 case UNION_TYPE:
6866 case QUAL_UNION_TYPE:
6867 /* Unions are similar to RECORD_TYPE but offset is always 0.
6869 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6871 if (TREE_CODE (field) == FIELD_DECL)
6873 int num;
6875 if (TREE_TYPE (field) == error_mark_node)
6876 continue;
6878 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6879 TREE_TYPE (field), subclasses,
6880 bit_offset);
6881 if (!num)
6882 return 0;
6883 for (i = 0; i < num && i < words; i++)
6884 classes[i] = merge_classes (subclasses[i], classes[i]);
6887 break;
6889 default:
6890 gcc_unreachable ();
6893 if (words > 2)
6895 /* When size > 16 bytes, if the first one isn't
6896 X86_64_SSE_CLASS or any other ones aren't
6897 X86_64_SSEUP_CLASS, everything should be passed in
6898 memory. */
6899 if (classes[0] != X86_64_SSE_CLASS)
6900 return 0;
6902 for (i = 1; i < words; i++)
6903 if (classes[i] != X86_64_SSEUP_CLASS)
6904 return 0;
6907 /* Final merger cleanup. */
6908 for (i = 0; i < words; i++)
6910 /* If one class is MEMORY, everything should be passed in
6911 memory. */
6912 if (classes[i] == X86_64_MEMORY_CLASS)
6913 return 0;
6915 /* The X86_64_SSEUP_CLASS should be always preceded by
6916 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6917 if (classes[i] == X86_64_SSEUP_CLASS
6918 && classes[i - 1] != X86_64_SSE_CLASS
6919 && classes[i - 1] != X86_64_SSEUP_CLASS)
6921 /* The first one should never be X86_64_SSEUP_CLASS. */
6922 gcc_assert (i != 0);
6923 classes[i] = X86_64_SSE_CLASS;
6926 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6927 everything should be passed in memory. */
6928 if (classes[i] == X86_64_X87UP_CLASS
6929 && (classes[i - 1] != X86_64_X87_CLASS))
6931 static bool warned;
6933 /* The first one should never be X86_64_X87UP_CLASS. */
6934 gcc_assert (i != 0);
6935 if (!warned && warn_psabi)
6937 warned = true;
6938 inform (input_location,
6939 "the ABI of passing union with long double"
6940 " has changed in GCC 4.4");
6942 return 0;
6945 return words;
6948 /* Compute alignment needed. We align all types to natural boundaries with
6949 exception of XFmode that is aligned to 64bits. */
6950 if (mode != VOIDmode && mode != BLKmode)
6952 int mode_alignment = GET_MODE_BITSIZE (mode);
6954 if (mode == XFmode)
6955 mode_alignment = 128;
6956 else if (mode == XCmode)
6957 mode_alignment = 256;
6958 if (COMPLEX_MODE_P (mode))
6959 mode_alignment /= 2;
6960 /* Misaligned fields are always returned in memory. */
6961 if (bit_offset % mode_alignment)
6962 return 0;
6965 /* for V1xx modes, just use the base mode */
6966 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6967 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6968 mode = GET_MODE_INNER (mode);
6970 /* Classification of atomic types. */
6971 switch (mode)
6973 case SDmode:
6974 case DDmode:
6975 classes[0] = X86_64_SSE_CLASS;
6976 return 1;
6977 case TDmode:
6978 classes[0] = X86_64_SSE_CLASS;
6979 classes[1] = X86_64_SSEUP_CLASS;
6980 return 2;
6981 case DImode:
6982 case SImode:
6983 case HImode:
6984 case QImode:
6985 case CSImode:
6986 case CHImode:
6987 case CQImode:
6989 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6991 /* Analyze last 128 bits only. */
6992 size = (size - 1) & 0x7f;
6994 if (size < 32)
6996 classes[0] = X86_64_INTEGERSI_CLASS;
6997 return 1;
6999 else if (size < 64)
7001 classes[0] = X86_64_INTEGER_CLASS;
7002 return 1;
7004 else if (size < 64+32)
7006 classes[0] = X86_64_INTEGER_CLASS;
7007 classes[1] = X86_64_INTEGERSI_CLASS;
7008 return 2;
7010 else if (size < 64+64)
7012 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7013 return 2;
7015 else
7016 gcc_unreachable ();
7018 case CDImode:
7019 case TImode:
7020 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7021 return 2;
7022 case COImode:
7023 case OImode:
7024 /* OImode shouldn't be used directly. */
7025 gcc_unreachable ();
7026 case CTImode:
7027 return 0;
7028 case SFmode:
7029 if (!(bit_offset % 64))
7030 classes[0] = X86_64_SSESF_CLASS;
7031 else
7032 classes[0] = X86_64_SSE_CLASS;
7033 return 1;
7034 case DFmode:
7035 classes[0] = X86_64_SSEDF_CLASS;
7036 return 1;
7037 case XFmode:
7038 classes[0] = X86_64_X87_CLASS;
7039 classes[1] = X86_64_X87UP_CLASS;
7040 return 2;
7041 case TFmode:
7042 classes[0] = X86_64_SSE_CLASS;
7043 classes[1] = X86_64_SSEUP_CLASS;
7044 return 2;
7045 case SCmode:
7046 classes[0] = X86_64_SSE_CLASS;
7047 if (!(bit_offset % 64))
7048 return 1;
7049 else
7051 static bool warned;
7053 if (!warned && warn_psabi)
7055 warned = true;
7056 inform (input_location,
7057 "the ABI of passing structure with complex float"
7058 " member has changed in GCC 4.4");
7060 classes[1] = X86_64_SSESF_CLASS;
7061 return 2;
7063 case DCmode:
7064 classes[0] = X86_64_SSEDF_CLASS;
7065 classes[1] = X86_64_SSEDF_CLASS;
7066 return 2;
7067 case XCmode:
7068 classes[0] = X86_64_COMPLEX_X87_CLASS;
7069 return 1;
7070 case TCmode:
7071 /* This modes is larger than 16 bytes. */
7072 return 0;
7073 case V8SFmode:
7074 case V8SImode:
7075 case V32QImode:
7076 case V16HImode:
7077 case V4DFmode:
7078 case V4DImode:
7079 classes[0] = X86_64_SSE_CLASS;
7080 classes[1] = X86_64_SSEUP_CLASS;
7081 classes[2] = X86_64_SSEUP_CLASS;
7082 classes[3] = X86_64_SSEUP_CLASS;
7083 return 4;
7084 case V8DFmode:
7085 case V16SFmode:
7086 case V8DImode:
7087 case V16SImode:
7088 case V32HImode:
7089 case V64QImode:
7090 classes[0] = X86_64_SSE_CLASS;
7091 classes[1] = X86_64_SSEUP_CLASS;
7092 classes[2] = X86_64_SSEUP_CLASS;
7093 classes[3] = X86_64_SSEUP_CLASS;
7094 classes[4] = X86_64_SSEUP_CLASS;
7095 classes[5] = X86_64_SSEUP_CLASS;
7096 classes[6] = X86_64_SSEUP_CLASS;
7097 classes[7] = X86_64_SSEUP_CLASS;
7098 return 8;
7099 case V4SFmode:
7100 case V4SImode:
7101 case V16QImode:
7102 case V8HImode:
7103 case V2DFmode:
7104 case V2DImode:
7105 classes[0] = X86_64_SSE_CLASS;
7106 classes[1] = X86_64_SSEUP_CLASS;
7107 return 2;
7108 case V1TImode:
7109 case V1DImode:
7110 case V2SFmode:
7111 case V2SImode:
7112 case V4HImode:
7113 case V8QImode:
7114 classes[0] = X86_64_SSE_CLASS;
7115 return 1;
7116 case BLKmode:
7117 case VOIDmode:
7118 return 0;
7119 default:
7120 gcc_assert (VECTOR_MODE_P (mode));
7122 if (bytes > 16)
7123 return 0;
7125 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7127 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7128 classes[0] = X86_64_INTEGERSI_CLASS;
7129 else
7130 classes[0] = X86_64_INTEGER_CLASS;
7131 classes[1] = X86_64_INTEGER_CLASS;
7132 return 1 + (bytes > 8);
7136 /* Examine the argument and return set number of register required in each
7137 class. Return true iff parameter should be passed in memory. */
7139 static bool
7140 examine_argument (machine_mode mode, const_tree type, int in_return,
7141 int *int_nregs, int *sse_nregs)
7143 enum x86_64_reg_class regclass[MAX_CLASSES];
7144 int n = classify_argument (mode, type, regclass, 0);
7146 *int_nregs = 0;
7147 *sse_nregs = 0;
7149 if (!n)
7150 return true;
7151 for (n--; n >= 0; n--)
7152 switch (regclass[n])
7154 case X86_64_INTEGER_CLASS:
7155 case X86_64_INTEGERSI_CLASS:
7156 (*int_nregs)++;
7157 break;
7158 case X86_64_SSE_CLASS:
7159 case X86_64_SSESF_CLASS:
7160 case X86_64_SSEDF_CLASS:
7161 (*sse_nregs)++;
7162 break;
7163 case X86_64_NO_CLASS:
7164 case X86_64_SSEUP_CLASS:
7165 break;
7166 case X86_64_X87_CLASS:
7167 case X86_64_X87UP_CLASS:
7168 case X86_64_COMPLEX_X87_CLASS:
7169 if (!in_return)
7170 return true;
7171 break;
7172 case X86_64_MEMORY_CLASS:
7173 gcc_unreachable ();
7176 return false;
7179 /* Construct container for the argument used by GCC interface. See
7180 FUNCTION_ARG for the detailed description. */
7182 static rtx
7183 construct_container (machine_mode mode, machine_mode orig_mode,
7184 const_tree type, int in_return, int nintregs, int nsseregs,
7185 const int *intreg, int sse_regno)
7187 /* The following variables hold the static issued_error state. */
7188 static bool issued_sse_arg_error;
7189 static bool issued_sse_ret_error;
7190 static bool issued_x87_ret_error;
7192 machine_mode tmpmode;
7193 int bytes =
7194 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7195 enum x86_64_reg_class regclass[MAX_CLASSES];
7196 int n;
7197 int i;
7198 int nexps = 0;
7199 int needed_sseregs, needed_intregs;
7200 rtx exp[MAX_CLASSES];
7201 rtx ret;
7203 n = classify_argument (mode, type, regclass, 0);
7204 if (!n)
7205 return NULL;
7206 if (examine_argument (mode, type, in_return, &needed_intregs,
7207 &needed_sseregs))
7208 return NULL;
7209 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7210 return NULL;
7212 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7213 some less clueful developer tries to use floating-point anyway. */
7214 if (needed_sseregs && !TARGET_SSE)
7216 if (in_return)
7218 if (!issued_sse_ret_error)
7220 error ("SSE register return with SSE disabled");
7221 issued_sse_ret_error = true;
7224 else if (!issued_sse_arg_error)
7226 error ("SSE register argument with SSE disabled");
7227 issued_sse_arg_error = true;
7229 return NULL;
7232 /* Likewise, error if the ABI requires us to return values in the
7233 x87 registers and the user specified -mno-80387. */
7234 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7235 for (i = 0; i < n; i++)
7236 if (regclass[i] == X86_64_X87_CLASS
7237 || regclass[i] == X86_64_X87UP_CLASS
7238 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7240 if (!issued_x87_ret_error)
7242 error ("x87 register return with x87 disabled");
7243 issued_x87_ret_error = true;
7245 return NULL;
7248 /* First construct simple cases. Avoid SCmode, since we want to use
7249 single register to pass this type. */
7250 if (n == 1 && mode != SCmode)
7251 switch (regclass[0])
7253 case X86_64_INTEGER_CLASS:
7254 case X86_64_INTEGERSI_CLASS:
7255 return gen_rtx_REG (mode, intreg[0]);
7256 case X86_64_SSE_CLASS:
7257 case X86_64_SSESF_CLASS:
7258 case X86_64_SSEDF_CLASS:
7259 if (mode != BLKmode)
7260 return gen_reg_or_parallel (mode, orig_mode,
7261 SSE_REGNO (sse_regno));
7262 break;
7263 case X86_64_X87_CLASS:
7264 case X86_64_COMPLEX_X87_CLASS:
7265 return gen_rtx_REG (mode, FIRST_STACK_REG);
7266 case X86_64_NO_CLASS:
7267 /* Zero sized array, struct or class. */
7268 return NULL;
7269 default:
7270 gcc_unreachable ();
7272 if (n == 2
7273 && regclass[0] == X86_64_SSE_CLASS
7274 && regclass[1] == X86_64_SSEUP_CLASS
7275 && mode != BLKmode)
7276 return gen_reg_or_parallel (mode, orig_mode,
7277 SSE_REGNO (sse_regno));
7278 if (n == 4
7279 && regclass[0] == X86_64_SSE_CLASS
7280 && regclass[1] == X86_64_SSEUP_CLASS
7281 && regclass[2] == X86_64_SSEUP_CLASS
7282 && regclass[3] == X86_64_SSEUP_CLASS
7283 && mode != BLKmode)
7284 return gen_reg_or_parallel (mode, orig_mode,
7285 SSE_REGNO (sse_regno));
7286 if (n == 8
7287 && regclass[0] == X86_64_SSE_CLASS
7288 && regclass[1] == X86_64_SSEUP_CLASS
7289 && regclass[2] == X86_64_SSEUP_CLASS
7290 && regclass[3] == X86_64_SSEUP_CLASS
7291 && regclass[4] == X86_64_SSEUP_CLASS
7292 && regclass[5] == X86_64_SSEUP_CLASS
7293 && regclass[6] == X86_64_SSEUP_CLASS
7294 && regclass[7] == X86_64_SSEUP_CLASS
7295 && mode != BLKmode)
7296 return gen_reg_or_parallel (mode, orig_mode,
7297 SSE_REGNO (sse_regno));
7298 if (n == 2
7299 && regclass[0] == X86_64_X87_CLASS
7300 && regclass[1] == X86_64_X87UP_CLASS)
7301 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7303 if (n == 2
7304 && regclass[0] == X86_64_INTEGER_CLASS
7305 && regclass[1] == X86_64_INTEGER_CLASS
7306 && (mode == CDImode || mode == TImode)
7307 && intreg[0] + 1 == intreg[1])
7308 return gen_rtx_REG (mode, intreg[0]);
7310 /* Otherwise figure out the entries of the PARALLEL. */
7311 for (i = 0; i < n; i++)
7313 int pos;
7315 switch (regclass[i])
7317 case X86_64_NO_CLASS:
7318 break;
7319 case X86_64_INTEGER_CLASS:
7320 case X86_64_INTEGERSI_CLASS:
7321 /* Merge TImodes on aligned occasions here too. */
7322 if (i * 8 + 8 > bytes)
7323 tmpmode
7324 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7325 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7326 tmpmode = SImode;
7327 else
7328 tmpmode = DImode;
7329 /* We've requested 24 bytes we
7330 don't have mode for. Use DImode. */
7331 if (tmpmode == BLKmode)
7332 tmpmode = DImode;
7333 exp [nexps++]
7334 = gen_rtx_EXPR_LIST (VOIDmode,
7335 gen_rtx_REG (tmpmode, *intreg),
7336 GEN_INT (i*8));
7337 intreg++;
7338 break;
7339 case X86_64_SSESF_CLASS:
7340 exp [nexps++]
7341 = gen_rtx_EXPR_LIST (VOIDmode,
7342 gen_rtx_REG (SFmode,
7343 SSE_REGNO (sse_regno)),
7344 GEN_INT (i*8));
7345 sse_regno++;
7346 break;
7347 case X86_64_SSEDF_CLASS:
7348 exp [nexps++]
7349 = gen_rtx_EXPR_LIST (VOIDmode,
7350 gen_rtx_REG (DFmode,
7351 SSE_REGNO (sse_regno)),
7352 GEN_INT (i*8));
7353 sse_regno++;
7354 break;
7355 case X86_64_SSE_CLASS:
7356 pos = i;
7357 switch (n)
7359 case 1:
7360 tmpmode = DImode;
7361 break;
7362 case 2:
7363 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7365 tmpmode = TImode;
7366 i++;
7368 else
7369 tmpmode = DImode;
7370 break;
7371 case 4:
7372 gcc_assert (i == 0
7373 && regclass[1] == X86_64_SSEUP_CLASS
7374 && regclass[2] == X86_64_SSEUP_CLASS
7375 && regclass[3] == X86_64_SSEUP_CLASS);
7376 tmpmode = OImode;
7377 i += 3;
7378 break;
7379 case 8:
7380 gcc_assert (i == 0
7381 && regclass[1] == X86_64_SSEUP_CLASS
7382 && regclass[2] == X86_64_SSEUP_CLASS
7383 && regclass[3] == X86_64_SSEUP_CLASS
7384 && regclass[4] == X86_64_SSEUP_CLASS
7385 && regclass[5] == X86_64_SSEUP_CLASS
7386 && regclass[6] == X86_64_SSEUP_CLASS
7387 && regclass[7] == X86_64_SSEUP_CLASS);
7388 tmpmode = XImode;
7389 i += 7;
7390 break;
7391 default:
7392 gcc_unreachable ();
7394 exp [nexps++]
7395 = gen_rtx_EXPR_LIST (VOIDmode,
7396 gen_rtx_REG (tmpmode,
7397 SSE_REGNO (sse_regno)),
7398 GEN_INT (pos*8));
7399 sse_regno++;
7400 break;
7401 default:
7402 gcc_unreachable ();
7406 /* Empty aligned struct, union or class. */
7407 if (nexps == 0)
7408 return NULL;
7410 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7411 for (i = 0; i < nexps; i++)
7412 XVECEXP (ret, 0, i) = exp [i];
7413 return ret;
7416 /* Update the data in CUM to advance over an argument of mode MODE
7417 and data type TYPE. (TYPE is null for libcalls where that information
7418 may not be available.)
7420 Return a number of integer regsiters advanced over. */
7422 static int
7423 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7424 const_tree type, HOST_WIDE_INT bytes,
7425 HOST_WIDE_INT words)
7427 int res = 0;
7429 switch (mode)
7431 default:
7432 break;
7434 case BLKmode:
7435 if (bytes < 0)
7436 break;
7437 /* FALLTHRU */
7439 case DImode:
7440 case SImode:
7441 case HImode:
7442 case QImode:
7443 cum->words += words;
7444 cum->nregs -= words;
7445 cum->regno += words;
7446 if (cum->nregs >= 0)
7447 res = words;
7448 if (cum->nregs <= 0)
7450 cum->nregs = 0;
7451 cum->regno = 0;
7453 break;
7455 case OImode:
7456 /* OImode shouldn't be used directly. */
7457 gcc_unreachable ();
7459 case DFmode:
7460 if (cum->float_in_sse < 2)
7461 break;
7462 case SFmode:
7463 if (cum->float_in_sse < 1)
7464 break;
7465 /* FALLTHRU */
7467 case V8SFmode:
7468 case V8SImode:
7469 case V64QImode:
7470 case V32HImode:
7471 case V16SImode:
7472 case V8DImode:
7473 case V16SFmode:
7474 case V8DFmode:
7475 case V32QImode:
7476 case V16HImode:
7477 case V4DFmode:
7478 case V4DImode:
7479 case TImode:
7480 case V16QImode:
7481 case V8HImode:
7482 case V4SImode:
7483 case V2DImode:
7484 case V4SFmode:
7485 case V2DFmode:
7486 if (!type || !AGGREGATE_TYPE_P (type))
7488 cum->sse_words += words;
7489 cum->sse_nregs -= 1;
7490 cum->sse_regno += 1;
7491 if (cum->sse_nregs <= 0)
7493 cum->sse_nregs = 0;
7494 cum->sse_regno = 0;
7497 break;
7499 case V8QImode:
7500 case V4HImode:
7501 case V2SImode:
7502 case V2SFmode:
7503 case V1TImode:
7504 case V1DImode:
7505 if (!type || !AGGREGATE_TYPE_P (type))
7507 cum->mmx_words += words;
7508 cum->mmx_nregs -= 1;
7509 cum->mmx_regno += 1;
7510 if (cum->mmx_nregs <= 0)
7512 cum->mmx_nregs = 0;
7513 cum->mmx_regno = 0;
7516 break;
7519 return res;
7522 static int
7523 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7524 const_tree type, HOST_WIDE_INT words, bool named)
7526 int int_nregs, sse_nregs;
7528 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7529 if (!named && (VALID_AVX512F_REG_MODE (mode)
7530 || VALID_AVX256_REG_MODE (mode)))
7531 return 0;
7533 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7534 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7536 cum->nregs -= int_nregs;
7537 cum->sse_nregs -= sse_nregs;
7538 cum->regno += int_nregs;
7539 cum->sse_regno += sse_nregs;
7540 return int_nregs;
7542 else
7544 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7545 cum->words = (cum->words + align - 1) & ~(align - 1);
7546 cum->words += words;
7547 return 0;
7551 static int
7552 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7553 HOST_WIDE_INT words)
7555 /* Otherwise, this should be passed indirect. */
7556 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7558 cum->words += words;
7559 if (cum->nregs > 0)
7561 cum->nregs -= 1;
7562 cum->regno += 1;
7563 return 1;
7565 return 0;
7568 /* Update the data in CUM to advance over an argument of mode MODE and
7569 data type TYPE. (TYPE is null for libcalls where that information
7570 may not be available.) */
7572 static void
7573 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7574 const_tree type, bool named)
7576 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7577 HOST_WIDE_INT bytes, words;
7578 int nregs;
7580 if (mode == BLKmode)
7581 bytes = int_size_in_bytes (type);
7582 else
7583 bytes = GET_MODE_SIZE (mode);
7584 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7586 if (type)
7587 mode = type_natural_mode (type, NULL, false);
7589 if ((type && POINTER_BOUNDS_TYPE_P (type))
7590 || POINTER_BOUNDS_MODE_P (mode))
7592 /* If we pass bounds in BT then just update remained bounds count. */
7593 if (cum->bnds_in_bt)
7595 cum->bnds_in_bt--;
7596 return;
7599 /* Update remained number of bounds to force. */
7600 if (cum->force_bnd_pass)
7601 cum->force_bnd_pass--;
7603 cum->bnd_regno++;
7605 return;
7608 /* The first arg not going to Bounds Tables resets this counter. */
7609 cum->bnds_in_bt = 0;
7610 /* For unnamed args we always pass bounds to avoid bounds mess when
7611 passed and received types do not match. If bounds do not follow
7612 unnamed arg, still pretend required number of bounds were passed. */
7613 if (cum->force_bnd_pass)
7615 cum->bnd_regno += cum->force_bnd_pass;
7616 cum->force_bnd_pass = 0;
7619 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7620 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7621 else if (TARGET_64BIT)
7622 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7623 else
7624 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7626 /* For stdarg we expect bounds to be passed for each value passed
7627 in register. */
7628 if (cum->stdarg)
7629 cum->force_bnd_pass = nregs;
7630 /* For pointers passed in memory we expect bounds passed in Bounds
7631 Table. */
7632 if (!nregs)
7633 cum->bnds_in_bt = chkp_type_bounds_count (type);
7636 /* Define where to put the arguments to a function.
7637 Value is zero to push the argument on the stack,
7638 or a hard register in which to store the argument.
7640 MODE is the argument's machine mode.
7641 TYPE is the data type of the argument (as a tree).
7642 This is null for libcalls where that information may
7643 not be available.
7644 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7645 the preceding args and about the function being called.
7646 NAMED is nonzero if this argument is a named parameter
7647 (otherwise it is an extra parameter matching an ellipsis). */
7649 static rtx
7650 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7651 machine_mode orig_mode, const_tree type,
7652 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7654 /* Avoid the AL settings for the Unix64 ABI. */
7655 if (mode == VOIDmode)
7656 return constm1_rtx;
7658 switch (mode)
7660 default:
7661 break;
7663 case BLKmode:
7664 if (bytes < 0)
7665 break;
7666 /* FALLTHRU */
7667 case DImode:
7668 case SImode:
7669 case HImode:
7670 case QImode:
7671 if (words <= cum->nregs)
7673 int regno = cum->regno;
7675 /* Fastcall allocates the first two DWORD (SImode) or
7676 smaller arguments to ECX and EDX if it isn't an
7677 aggregate type . */
7678 if (cum->fastcall)
7680 if (mode == BLKmode
7681 || mode == DImode
7682 || (type && AGGREGATE_TYPE_P (type)))
7683 break;
7685 /* ECX not EAX is the first allocated register. */
7686 if (regno == AX_REG)
7687 regno = CX_REG;
7689 return gen_rtx_REG (mode, regno);
7691 break;
7693 case DFmode:
7694 if (cum->float_in_sse < 2)
7695 break;
7696 case SFmode:
7697 if (cum->float_in_sse < 1)
7698 break;
7699 /* FALLTHRU */
7700 case TImode:
7701 /* In 32bit, we pass TImode in xmm registers. */
7702 case V16QImode:
7703 case V8HImode:
7704 case V4SImode:
7705 case V2DImode:
7706 case V4SFmode:
7707 case V2DFmode:
7708 if (!type || !AGGREGATE_TYPE_P (type))
7710 if (cum->sse_nregs)
7711 return gen_reg_or_parallel (mode, orig_mode,
7712 cum->sse_regno + FIRST_SSE_REG);
7714 break;
7716 case OImode:
7717 case XImode:
7718 /* OImode and XImode shouldn't be used directly. */
7719 gcc_unreachable ();
7721 case V64QImode:
7722 case V32HImode:
7723 case V16SImode:
7724 case V8DImode:
7725 case V16SFmode:
7726 case V8DFmode:
7727 case V8SFmode:
7728 case V8SImode:
7729 case V32QImode:
7730 case V16HImode:
7731 case V4DFmode:
7732 case V4DImode:
7733 if (!type || !AGGREGATE_TYPE_P (type))
7735 if (cum->sse_nregs)
7736 return gen_reg_or_parallel (mode, orig_mode,
7737 cum->sse_regno + FIRST_SSE_REG);
7739 break;
7741 case V8QImode:
7742 case V4HImode:
7743 case V2SImode:
7744 case V2SFmode:
7745 case V1TImode:
7746 case V1DImode:
7747 if (!type || !AGGREGATE_TYPE_P (type))
7749 if (cum->mmx_nregs)
7750 return gen_reg_or_parallel (mode, orig_mode,
7751 cum->mmx_regno + FIRST_MMX_REG);
7753 break;
7756 return NULL_RTX;
7759 static rtx
7760 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7761 machine_mode orig_mode, const_tree type, bool named)
7763 /* Handle a hidden AL argument containing number of registers
7764 for varargs x86-64 functions. */
7765 if (mode == VOIDmode)
7766 return GEN_INT (cum->maybe_vaarg
7767 ? (cum->sse_nregs < 0
7768 ? X86_64_SSE_REGPARM_MAX
7769 : cum->sse_regno)
7770 : -1);
7772 switch (mode)
7774 default:
7775 break;
7777 case V8SFmode:
7778 case V8SImode:
7779 case V32QImode:
7780 case V16HImode:
7781 case V4DFmode:
7782 case V4DImode:
7783 case V16SFmode:
7784 case V16SImode:
7785 case V64QImode:
7786 case V32HImode:
7787 case V8DFmode:
7788 case V8DImode:
7789 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7790 if (!named)
7791 return NULL;
7792 break;
7795 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7796 cum->sse_nregs,
7797 &x86_64_int_parameter_registers [cum->regno],
7798 cum->sse_regno);
7801 static rtx
7802 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7803 machine_mode orig_mode, bool named,
7804 HOST_WIDE_INT bytes)
7806 unsigned int regno;
7808 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7809 We use value of -2 to specify that current function call is MSABI. */
7810 if (mode == VOIDmode)
7811 return GEN_INT (-2);
7813 /* If we've run out of registers, it goes on the stack. */
7814 if (cum->nregs == 0)
7815 return NULL_RTX;
7817 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7819 /* Only floating point modes are passed in anything but integer regs. */
7820 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7822 if (named)
7823 regno = cum->regno + FIRST_SSE_REG;
7824 else
7826 rtx t1, t2;
7828 /* Unnamed floating parameters are passed in both the
7829 SSE and integer registers. */
7830 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7831 t2 = gen_rtx_REG (mode, regno);
7832 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7833 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7834 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7837 /* Handle aggregated types passed in register. */
7838 if (orig_mode == BLKmode)
7840 if (bytes > 0 && bytes <= 8)
7841 mode = (bytes > 4 ? DImode : SImode);
7842 if (mode == BLKmode)
7843 mode = DImode;
7846 return gen_reg_or_parallel (mode, orig_mode, regno);
7849 /* Return where to put the arguments to a function.
7850 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7852 MODE is the argument's machine mode. TYPE is the data type of the
7853 argument. It is null for libcalls where that information may not be
7854 available. CUM gives information about the preceding args and about
7855 the function being called. NAMED is nonzero if this argument is a
7856 named parameter (otherwise it is an extra parameter matching an
7857 ellipsis). */
7859 static rtx
7860 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7861 const_tree type, bool named)
7863 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7864 machine_mode mode = omode;
7865 HOST_WIDE_INT bytes, words;
7866 rtx arg;
7868 /* All pointer bounds argumntas are handled separately here. */
7869 if ((type && POINTER_BOUNDS_TYPE_P (type))
7870 || POINTER_BOUNDS_MODE_P (mode))
7872 /* Return NULL if bounds are forced to go in Bounds Table. */
7873 if (cum->bnds_in_bt)
7874 arg = NULL;
7875 /* Return the next available bound reg if any. */
7876 else if (cum->bnd_regno <= LAST_BND_REG)
7877 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7878 /* Return the next special slot number otherwise. */
7879 else
7880 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7882 return arg;
7885 if (mode == BLKmode)
7886 bytes = int_size_in_bytes (type);
7887 else
7888 bytes = GET_MODE_SIZE (mode);
7889 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7891 /* To simplify the code below, represent vector types with a vector mode
7892 even if MMX/SSE are not active. */
7893 if (type && TREE_CODE (type) == VECTOR_TYPE)
7894 mode = type_natural_mode (type, cum, false);
7896 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7897 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7898 else if (TARGET_64BIT)
7899 arg = function_arg_64 (cum, mode, omode, type, named);
7900 else
7901 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7903 return arg;
7906 /* A C expression that indicates when an argument must be passed by
7907 reference. If nonzero for an argument, a copy of that argument is
7908 made in memory and a pointer to the argument is passed instead of
7909 the argument itself. The pointer is passed in whatever way is
7910 appropriate for passing a pointer to that type. */
7912 static bool
7913 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7914 const_tree type, bool)
7916 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7918 /* Bounds are never passed by reference. */
7919 if ((type && POINTER_BOUNDS_TYPE_P (type))
7920 || POINTER_BOUNDS_MODE_P (mode))
7921 return false;
7923 /* See Windows x64 Software Convention. */
7924 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7926 int msize = (int) GET_MODE_SIZE (mode);
7927 if (type)
7929 /* Arrays are passed by reference. */
7930 if (TREE_CODE (type) == ARRAY_TYPE)
7931 return true;
7933 if (AGGREGATE_TYPE_P (type))
7935 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7936 are passed by reference. */
7937 msize = int_size_in_bytes (type);
7941 /* __m128 is passed by reference. */
7942 switch (msize) {
7943 case 1: case 2: case 4: case 8:
7944 break;
7945 default:
7946 return true;
7949 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7950 return 1;
7952 return 0;
7955 /* Return true when TYPE should be 128bit aligned for 32bit argument
7956 passing ABI. XXX: This function is obsolete and is only used for
7957 checking psABI compatibility with previous versions of GCC. */
7959 static bool
7960 ix86_compat_aligned_value_p (const_tree type)
7962 machine_mode mode = TYPE_MODE (type);
7963 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7964 || mode == TDmode
7965 || mode == TFmode
7966 || mode == TCmode)
7967 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7968 return true;
7969 if (TYPE_ALIGN (type) < 128)
7970 return false;
7972 if (AGGREGATE_TYPE_P (type))
7974 /* Walk the aggregates recursively. */
7975 switch (TREE_CODE (type))
7977 case RECORD_TYPE:
7978 case UNION_TYPE:
7979 case QUAL_UNION_TYPE:
7981 tree field;
7983 /* Walk all the structure fields. */
7984 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7986 if (TREE_CODE (field) == FIELD_DECL
7987 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7988 return true;
7990 break;
7993 case ARRAY_TYPE:
7994 /* Just for use if some languages passes arrays by value. */
7995 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7996 return true;
7997 break;
7999 default:
8000 gcc_unreachable ();
8003 return false;
8006 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8007 XXX: This function is obsolete and is only used for checking psABI
8008 compatibility with previous versions of GCC. */
8010 static unsigned int
8011 ix86_compat_function_arg_boundary (machine_mode mode,
8012 const_tree type, unsigned int align)
8014 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8015 natural boundaries. */
8016 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8018 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8019 make an exception for SSE modes since these require 128bit
8020 alignment.
8022 The handling here differs from field_alignment. ICC aligns MMX
8023 arguments to 4 byte boundaries, while structure fields are aligned
8024 to 8 byte boundaries. */
8025 if (!type)
8027 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8028 align = PARM_BOUNDARY;
8030 else
8032 if (!ix86_compat_aligned_value_p (type))
8033 align = PARM_BOUNDARY;
8036 if (align > BIGGEST_ALIGNMENT)
8037 align = BIGGEST_ALIGNMENT;
8038 return align;
8041 /* Return true when TYPE should be 128bit aligned for 32bit argument
8042 passing ABI. */
8044 static bool
8045 ix86_contains_aligned_value_p (const_tree type)
8047 machine_mode mode = TYPE_MODE (type);
8049 if (mode == XFmode || mode == XCmode)
8050 return false;
8052 if (TYPE_ALIGN (type) < 128)
8053 return false;
8055 if (AGGREGATE_TYPE_P (type))
8057 /* Walk the aggregates recursively. */
8058 switch (TREE_CODE (type))
8060 case RECORD_TYPE:
8061 case UNION_TYPE:
8062 case QUAL_UNION_TYPE:
8064 tree field;
8066 /* Walk all the structure fields. */
8067 for (field = TYPE_FIELDS (type);
8068 field;
8069 field = DECL_CHAIN (field))
8071 if (TREE_CODE (field) == FIELD_DECL
8072 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8073 return true;
8075 break;
8078 case ARRAY_TYPE:
8079 /* Just for use if some languages passes arrays by value. */
8080 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8081 return true;
8082 break;
8084 default:
8085 gcc_unreachable ();
8088 else
8089 return TYPE_ALIGN (type) >= 128;
8091 return false;
8094 /* Gives the alignment boundary, in bits, of an argument with the
8095 specified mode and type. */
8097 static unsigned int
8098 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8100 unsigned int align;
8101 if (type)
8103 /* Since the main variant type is used for call, we convert it to
8104 the main variant type. */
8105 type = TYPE_MAIN_VARIANT (type);
8106 align = TYPE_ALIGN (type);
8108 else
8109 align = GET_MODE_ALIGNMENT (mode);
8110 if (align < PARM_BOUNDARY)
8111 align = PARM_BOUNDARY;
8112 else
8114 static bool warned;
8115 unsigned int saved_align = align;
8117 if (!TARGET_64BIT)
8119 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8120 if (!type)
8122 if (mode == XFmode || mode == XCmode)
8123 align = PARM_BOUNDARY;
8125 else if (!ix86_contains_aligned_value_p (type))
8126 align = PARM_BOUNDARY;
8128 if (align < 128)
8129 align = PARM_BOUNDARY;
8132 if (warn_psabi
8133 && !warned
8134 && align != ix86_compat_function_arg_boundary (mode, type,
8135 saved_align))
8137 warned = true;
8138 inform (input_location,
8139 "The ABI for passing parameters with %d-byte"
8140 " alignment has changed in GCC 4.6",
8141 align / BITS_PER_UNIT);
8145 return align;
8148 /* Return true if N is a possible register number of function value. */
8150 static bool
8151 ix86_function_value_regno_p (const unsigned int regno)
8153 switch (regno)
8155 case AX_REG:
8156 return true;
8157 case DX_REG:
8158 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8159 case DI_REG:
8160 case SI_REG:
8161 return TARGET_64BIT && ix86_abi != MS_ABI;
8163 case FIRST_BND_REG:
8164 return chkp_function_instrumented_p (current_function_decl);
8166 /* Complex values are returned in %st(0)/%st(1) pair. */
8167 case ST0_REG:
8168 case ST1_REG:
8169 /* TODO: The function should depend on current function ABI but
8170 builtins.c would need updating then. Therefore we use the
8171 default ABI. */
8172 if (TARGET_64BIT && ix86_abi == MS_ABI)
8173 return false;
8174 return TARGET_FLOAT_RETURNS_IN_80387;
8176 /* Complex values are returned in %xmm0/%xmm1 pair. */
8177 case XMM0_REG:
8178 case XMM1_REG:
8179 return TARGET_SSE;
8181 case MM0_REG:
8182 if (TARGET_MACHO || TARGET_64BIT)
8183 return false;
8184 return TARGET_MMX;
8187 return false;
8190 /* Define how to find the value returned by a function.
8191 VALTYPE is the data type of the value (as a tree).
8192 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8193 otherwise, FUNC is 0. */
8195 static rtx
8196 function_value_32 (machine_mode orig_mode, machine_mode mode,
8197 const_tree fntype, const_tree fn)
8199 unsigned int regno;
8201 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8202 we normally prevent this case when mmx is not available. However
8203 some ABIs may require the result to be returned like DImode. */
8204 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8205 regno = FIRST_MMX_REG;
8207 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8208 we prevent this case when sse is not available. However some ABIs
8209 may require the result to be returned like integer TImode. */
8210 else if (mode == TImode
8211 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8212 regno = FIRST_SSE_REG;
8214 /* 32-byte vector modes in %ymm0. */
8215 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8216 regno = FIRST_SSE_REG;
8218 /* 64-byte vector modes in %zmm0. */
8219 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8220 regno = FIRST_SSE_REG;
8222 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8223 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8224 regno = FIRST_FLOAT_REG;
8225 else
8226 /* Most things go in %eax. */
8227 regno = AX_REG;
8229 /* Override FP return register with %xmm0 for local functions when
8230 SSE math is enabled or for functions with sseregparm attribute. */
8231 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8233 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8234 if ((sse_level >= 1 && mode == SFmode)
8235 || (sse_level == 2 && mode == DFmode))
8236 regno = FIRST_SSE_REG;
8239 /* OImode shouldn't be used directly. */
8240 gcc_assert (mode != OImode);
8242 return gen_rtx_REG (orig_mode, regno);
8245 static rtx
8246 function_value_64 (machine_mode orig_mode, machine_mode mode,
8247 const_tree valtype)
8249 rtx ret;
8251 /* Handle libcalls, which don't provide a type node. */
8252 if (valtype == NULL)
8254 unsigned int regno;
8256 switch (mode)
8258 case SFmode:
8259 case SCmode:
8260 case DFmode:
8261 case DCmode:
8262 case TFmode:
8263 case SDmode:
8264 case DDmode:
8265 case TDmode:
8266 regno = FIRST_SSE_REG;
8267 break;
8268 case XFmode:
8269 case XCmode:
8270 regno = FIRST_FLOAT_REG;
8271 break;
8272 case TCmode:
8273 return NULL;
8274 default:
8275 regno = AX_REG;
8278 return gen_rtx_REG (mode, regno);
8280 else if (POINTER_TYPE_P (valtype))
8282 /* Pointers are always returned in word_mode. */
8283 mode = word_mode;
8286 ret = construct_container (mode, orig_mode, valtype, 1,
8287 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8288 x86_64_int_return_registers, 0);
8290 /* For zero sized structures, construct_container returns NULL, but we
8291 need to keep rest of compiler happy by returning meaningful value. */
8292 if (!ret)
8293 ret = gen_rtx_REG (orig_mode, AX_REG);
8295 return ret;
8298 static rtx
8299 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8300 const_tree valtype)
8302 unsigned int regno = AX_REG;
8304 if (TARGET_SSE)
8306 switch (GET_MODE_SIZE (mode))
8308 case 16:
8309 if (valtype != NULL_TREE
8310 && !VECTOR_INTEGER_TYPE_P (valtype)
8311 && !VECTOR_INTEGER_TYPE_P (valtype)
8312 && !INTEGRAL_TYPE_P (valtype)
8313 && !VECTOR_FLOAT_TYPE_P (valtype))
8314 break;
8315 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8316 && !COMPLEX_MODE_P (mode))
8317 regno = FIRST_SSE_REG;
8318 break;
8319 case 8:
8320 case 4:
8321 if (mode == SFmode || mode == DFmode)
8322 regno = FIRST_SSE_REG;
8323 break;
8324 default:
8325 break;
8328 return gen_rtx_REG (orig_mode, regno);
8331 static rtx
8332 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8333 machine_mode orig_mode, machine_mode mode)
8335 const_tree fn, fntype;
8337 fn = NULL_TREE;
8338 if (fntype_or_decl && DECL_P (fntype_or_decl))
8339 fn = fntype_or_decl;
8340 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8342 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8343 || POINTER_BOUNDS_MODE_P (mode))
8344 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8345 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8346 return function_value_ms_64 (orig_mode, mode, valtype);
8347 else if (TARGET_64BIT)
8348 return function_value_64 (orig_mode, mode, valtype);
8349 else
8350 return function_value_32 (orig_mode, mode, fntype, fn);
8353 static rtx
8354 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8356 machine_mode mode, orig_mode;
8358 orig_mode = TYPE_MODE (valtype);
8359 mode = type_natural_mode (valtype, NULL, true);
8360 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8363 /* Return an RTX representing a place where a function returns
8364 or recieves pointer bounds or NULL if no bounds are returned.
8366 VALTYPE is a data type of a value returned by the function.
8368 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8369 or FUNCTION_TYPE of the function.
8371 If OUTGOING is false, return a place in which the caller will
8372 see the return value. Otherwise, return a place where a
8373 function returns a value. */
8375 static rtx
8376 ix86_function_value_bounds (const_tree valtype,
8377 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8378 bool outgoing ATTRIBUTE_UNUSED)
8380 rtx res = NULL_RTX;
8382 if (BOUNDED_TYPE_P (valtype))
8383 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8384 else if (chkp_type_has_pointer (valtype))
8386 bitmap slots;
8387 rtx bounds[2];
8388 bitmap_iterator bi;
8389 unsigned i, bnd_no = 0;
8391 bitmap_obstack_initialize (NULL);
8392 slots = BITMAP_ALLOC (NULL);
8393 chkp_find_bound_slots (valtype, slots);
8395 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8397 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8398 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8399 gcc_assert (bnd_no < 2);
8400 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8403 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8405 BITMAP_FREE (slots);
8406 bitmap_obstack_release (NULL);
8408 else
8409 res = NULL_RTX;
8411 return res;
8414 /* Pointer function arguments and return values are promoted to
8415 word_mode. */
8417 static machine_mode
8418 ix86_promote_function_mode (const_tree type, machine_mode mode,
8419 int *punsignedp, const_tree fntype,
8420 int for_return)
8422 if (type != NULL_TREE && POINTER_TYPE_P (type))
8424 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8425 return word_mode;
8427 return default_promote_function_mode (type, mode, punsignedp, fntype,
8428 for_return);
8431 /* Return true if a structure, union or array with MODE containing FIELD
8432 should be accessed using BLKmode. */
8434 static bool
8435 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8437 /* Union with XFmode must be in BLKmode. */
8438 return (mode == XFmode
8439 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8440 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8444 ix86_libcall_value (machine_mode mode)
8446 return ix86_function_value_1 (NULL, NULL, mode, mode);
8449 /* Return true iff type is returned in memory. */
8451 static bool
8452 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8454 #ifdef SUBTARGET_RETURN_IN_MEMORY
8455 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8456 #else
8457 const machine_mode mode = type_natural_mode (type, NULL, true);
8458 HOST_WIDE_INT size;
8460 if (POINTER_BOUNDS_TYPE_P (type))
8461 return false;
8463 if (TARGET_64BIT)
8465 if (ix86_function_type_abi (fntype) == MS_ABI)
8467 size = int_size_in_bytes (type);
8469 /* __m128 is returned in xmm0. */
8470 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8471 || INTEGRAL_TYPE_P (type)
8472 || VECTOR_FLOAT_TYPE_P (type))
8473 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8474 && !COMPLEX_MODE_P (mode)
8475 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8476 return false;
8478 /* Otherwise, the size must be exactly in [1248]. */
8479 return size != 1 && size != 2 && size != 4 && size != 8;
8481 else
8483 int needed_intregs, needed_sseregs;
8485 return examine_argument (mode, type, 1,
8486 &needed_intregs, &needed_sseregs);
8489 else
8491 if (mode == BLKmode)
8492 return true;
8494 size = int_size_in_bytes (type);
8496 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8497 return false;
8499 if (VECTOR_MODE_P (mode) || mode == TImode)
8501 /* User-created vectors small enough to fit in EAX. */
8502 if (size < 8)
8503 return false;
8505 /* Unless ABI prescibes otherwise,
8506 MMX/3dNow values are returned in MM0 if available. */
8508 if (size == 8)
8509 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8511 /* SSE values are returned in XMM0 if available. */
8512 if (size == 16)
8513 return !TARGET_SSE;
8515 /* AVX values are returned in YMM0 if available. */
8516 if (size == 32)
8517 return !TARGET_AVX;
8519 /* AVX512F values are returned in ZMM0 if available. */
8520 if (size == 64)
8521 return !TARGET_AVX512F;
8524 if (mode == XFmode)
8525 return false;
8527 if (size > 12)
8528 return true;
8530 /* OImode shouldn't be used directly. */
8531 gcc_assert (mode != OImode);
8533 return false;
8535 #endif
8539 /* Create the va_list data type. */
8541 /* Returns the calling convention specific va_list date type.
8542 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8544 static tree
8545 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8547 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8549 /* For i386 we use plain pointer to argument area. */
8550 if (!TARGET_64BIT || abi == MS_ABI)
8551 return build_pointer_type (char_type_node);
8553 record = lang_hooks.types.make_type (RECORD_TYPE);
8554 type_decl = build_decl (BUILTINS_LOCATION,
8555 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8557 f_gpr = build_decl (BUILTINS_LOCATION,
8558 FIELD_DECL, get_identifier ("gp_offset"),
8559 unsigned_type_node);
8560 f_fpr = build_decl (BUILTINS_LOCATION,
8561 FIELD_DECL, get_identifier ("fp_offset"),
8562 unsigned_type_node);
8563 f_ovf = build_decl (BUILTINS_LOCATION,
8564 FIELD_DECL, get_identifier ("overflow_arg_area"),
8565 ptr_type_node);
8566 f_sav = build_decl (BUILTINS_LOCATION,
8567 FIELD_DECL, get_identifier ("reg_save_area"),
8568 ptr_type_node);
8570 va_list_gpr_counter_field = f_gpr;
8571 va_list_fpr_counter_field = f_fpr;
8573 DECL_FIELD_CONTEXT (f_gpr) = record;
8574 DECL_FIELD_CONTEXT (f_fpr) = record;
8575 DECL_FIELD_CONTEXT (f_ovf) = record;
8576 DECL_FIELD_CONTEXT (f_sav) = record;
8578 TYPE_STUB_DECL (record) = type_decl;
8579 TYPE_NAME (record) = type_decl;
8580 TYPE_FIELDS (record) = f_gpr;
8581 DECL_CHAIN (f_gpr) = f_fpr;
8582 DECL_CHAIN (f_fpr) = f_ovf;
8583 DECL_CHAIN (f_ovf) = f_sav;
8585 layout_type (record);
8587 /* The correct type is an array type of one element. */
8588 return build_array_type (record, build_index_type (size_zero_node));
8591 /* Setup the builtin va_list data type and for 64-bit the additional
8592 calling convention specific va_list data types. */
8594 static tree
8595 ix86_build_builtin_va_list (void)
8597 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8599 /* Initialize abi specific va_list builtin types. */
8600 if (TARGET_64BIT)
8602 tree t;
8603 if (ix86_abi == MS_ABI)
8605 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8606 if (TREE_CODE (t) != RECORD_TYPE)
8607 t = build_variant_type_copy (t);
8608 sysv_va_list_type_node = t;
8610 else
8612 t = ret;
8613 if (TREE_CODE (t) != RECORD_TYPE)
8614 t = build_variant_type_copy (t);
8615 sysv_va_list_type_node = t;
8617 if (ix86_abi != MS_ABI)
8619 t = ix86_build_builtin_va_list_abi (MS_ABI);
8620 if (TREE_CODE (t) != RECORD_TYPE)
8621 t = build_variant_type_copy (t);
8622 ms_va_list_type_node = t;
8624 else
8626 t = ret;
8627 if (TREE_CODE (t) != RECORD_TYPE)
8628 t = build_variant_type_copy (t);
8629 ms_va_list_type_node = t;
8633 return ret;
8636 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8638 static void
8639 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8641 rtx save_area, mem;
8642 alias_set_type set;
8643 int i, max;
8645 /* GPR size of varargs save area. */
8646 if (cfun->va_list_gpr_size)
8647 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8648 else
8649 ix86_varargs_gpr_size = 0;
8651 /* FPR size of varargs save area. We don't need it if we don't pass
8652 anything in SSE registers. */
8653 if (TARGET_SSE && cfun->va_list_fpr_size)
8654 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8655 else
8656 ix86_varargs_fpr_size = 0;
8658 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8659 return;
8661 save_area = frame_pointer_rtx;
8662 set = get_varargs_alias_set ();
8664 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8665 if (max > X86_64_REGPARM_MAX)
8666 max = X86_64_REGPARM_MAX;
8668 for (i = cum->regno; i < max; i++)
8670 mem = gen_rtx_MEM (word_mode,
8671 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8672 MEM_NOTRAP_P (mem) = 1;
8673 set_mem_alias_set (mem, set);
8674 emit_move_insn (mem,
8675 gen_rtx_REG (word_mode,
8676 x86_64_int_parameter_registers[i]));
8679 if (ix86_varargs_fpr_size)
8681 machine_mode smode;
8682 rtx_code_label *label;
8683 rtx test;
8685 /* Now emit code to save SSE registers. The AX parameter contains number
8686 of SSE parameter registers used to call this function, though all we
8687 actually check here is the zero/non-zero status. */
8689 label = gen_label_rtx ();
8690 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8691 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8692 label));
8694 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8695 we used movdqa (i.e. TImode) instead? Perhaps even better would
8696 be if we could determine the real mode of the data, via a hook
8697 into pass_stdarg. Ignore all that for now. */
8698 smode = V4SFmode;
8699 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8700 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8702 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8703 if (max > X86_64_SSE_REGPARM_MAX)
8704 max = X86_64_SSE_REGPARM_MAX;
8706 for (i = cum->sse_regno; i < max; ++i)
8708 mem = plus_constant (Pmode, save_area,
8709 i * 16 + ix86_varargs_gpr_size);
8710 mem = gen_rtx_MEM (smode, mem);
8711 MEM_NOTRAP_P (mem) = 1;
8712 set_mem_alias_set (mem, set);
8713 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8715 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8718 emit_label (label);
8722 static void
8723 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8725 alias_set_type set = get_varargs_alias_set ();
8726 int i;
8728 /* Reset to zero, as there might be a sysv vaarg used
8729 before. */
8730 ix86_varargs_gpr_size = 0;
8731 ix86_varargs_fpr_size = 0;
8733 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8735 rtx reg, mem;
8737 mem = gen_rtx_MEM (Pmode,
8738 plus_constant (Pmode, virtual_incoming_args_rtx,
8739 i * UNITS_PER_WORD));
8740 MEM_NOTRAP_P (mem) = 1;
8741 set_mem_alias_set (mem, set);
8743 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8744 emit_move_insn (mem, reg);
8748 static void
8749 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8750 tree type, int *, int no_rtl)
8752 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8753 CUMULATIVE_ARGS next_cum;
8754 tree fntype;
8756 /* This argument doesn't appear to be used anymore. Which is good,
8757 because the old code here didn't suppress rtl generation. */
8758 gcc_assert (!no_rtl);
8760 if (!TARGET_64BIT)
8761 return;
8763 fntype = TREE_TYPE (current_function_decl);
8765 /* For varargs, we do not want to skip the dummy va_dcl argument.
8766 For stdargs, we do want to skip the last named argument. */
8767 next_cum = *cum;
8768 if (stdarg_p (fntype))
8769 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8770 true);
8772 if (cum->call_abi == MS_ABI)
8773 setup_incoming_varargs_ms_64 (&next_cum);
8774 else
8775 setup_incoming_varargs_64 (&next_cum);
8778 static void
8779 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8780 enum machine_mode mode,
8781 tree type,
8782 int *pretend_size ATTRIBUTE_UNUSED,
8783 int no_rtl)
8785 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8786 CUMULATIVE_ARGS next_cum;
8787 tree fntype;
8788 rtx save_area;
8789 int bnd_reg, i, max;
8791 gcc_assert (!no_rtl);
8793 /* Do nothing if we use plain pointer to argument area. */
8794 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8795 return;
8797 fntype = TREE_TYPE (current_function_decl);
8799 /* For varargs, we do not want to skip the dummy va_dcl argument.
8800 For stdargs, we do want to skip the last named argument. */
8801 next_cum = *cum;
8802 if (stdarg_p (fntype))
8803 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8804 true);
8805 save_area = frame_pointer_rtx;
8807 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8808 if (max > X86_64_REGPARM_MAX)
8809 max = X86_64_REGPARM_MAX;
8811 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8812 if (chkp_function_instrumented_p (current_function_decl))
8813 for (i = cum->regno; i < max; i++)
8815 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8816 rtx reg = gen_rtx_REG (DImode,
8817 x86_64_int_parameter_registers[i]);
8818 rtx ptr = reg;
8819 rtx bounds;
8821 if (bnd_reg <= LAST_BND_REG)
8822 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8823 else
8825 rtx ldx_addr =
8826 plus_constant (Pmode, arg_pointer_rtx,
8827 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8828 bounds = gen_reg_rtx (BNDmode);
8829 emit_insn (BNDmode == BND64mode
8830 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8831 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8834 emit_insn (BNDmode == BND64mode
8835 ? gen_bnd64_stx (addr, ptr, bounds)
8836 : gen_bnd32_stx (addr, ptr, bounds));
8838 bnd_reg++;
8843 /* Checks if TYPE is of kind va_list char *. */
8845 static bool
8846 is_va_list_char_pointer (tree type)
8848 tree canonic;
8850 /* For 32-bit it is always true. */
8851 if (!TARGET_64BIT)
8852 return true;
8853 canonic = ix86_canonical_va_list_type (type);
8854 return (canonic == ms_va_list_type_node
8855 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8858 /* Implement va_start. */
8860 static void
8861 ix86_va_start (tree valist, rtx nextarg)
8863 HOST_WIDE_INT words, n_gpr, n_fpr;
8864 tree f_gpr, f_fpr, f_ovf, f_sav;
8865 tree gpr, fpr, ovf, sav, t;
8866 tree type;
8867 rtx ovf_rtx;
8869 if (flag_split_stack
8870 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8872 unsigned int scratch_regno;
8874 /* When we are splitting the stack, we can't refer to the stack
8875 arguments using internal_arg_pointer, because they may be on
8876 the old stack. The split stack prologue will arrange to
8877 leave a pointer to the old stack arguments in a scratch
8878 register, which we here copy to a pseudo-register. The split
8879 stack prologue can't set the pseudo-register directly because
8880 it (the prologue) runs before any registers have been saved. */
8882 scratch_regno = split_stack_prologue_scratch_regno ();
8883 if (scratch_regno != INVALID_REGNUM)
8885 rtx reg;
8886 rtx_insn *seq;
8888 reg = gen_reg_rtx (Pmode);
8889 cfun->machine->split_stack_varargs_pointer = reg;
8891 start_sequence ();
8892 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8893 seq = get_insns ();
8894 end_sequence ();
8896 push_topmost_sequence ();
8897 emit_insn_after (seq, entry_of_function ());
8898 pop_topmost_sequence ();
8902 /* Only 64bit target needs something special. */
8903 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8905 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8906 std_expand_builtin_va_start (valist, nextarg);
8907 else
8909 rtx va_r, next;
8911 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8912 next = expand_binop (ptr_mode, add_optab,
8913 cfun->machine->split_stack_varargs_pointer,
8914 crtl->args.arg_offset_rtx,
8915 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8916 convert_move (va_r, next, 0);
8918 /* Store zero bounds for va_list. */
8919 if (chkp_function_instrumented_p (current_function_decl))
8920 chkp_expand_bounds_reset_for_mem (valist,
8921 make_tree (TREE_TYPE (valist),
8922 next));
8925 return;
8928 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8929 f_fpr = DECL_CHAIN (f_gpr);
8930 f_ovf = DECL_CHAIN (f_fpr);
8931 f_sav = DECL_CHAIN (f_ovf);
8933 valist = build_simple_mem_ref (valist);
8934 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8935 /* The following should be folded into the MEM_REF offset. */
8936 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8937 f_gpr, NULL_TREE);
8938 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8939 f_fpr, NULL_TREE);
8940 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8941 f_ovf, NULL_TREE);
8942 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8943 f_sav, NULL_TREE);
8945 /* Count number of gp and fp argument registers used. */
8946 words = crtl->args.info.words;
8947 n_gpr = crtl->args.info.regno;
8948 n_fpr = crtl->args.info.sse_regno;
8950 if (cfun->va_list_gpr_size)
8952 type = TREE_TYPE (gpr);
8953 t = build2 (MODIFY_EXPR, type,
8954 gpr, build_int_cst (type, n_gpr * 8));
8955 TREE_SIDE_EFFECTS (t) = 1;
8956 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8959 if (TARGET_SSE && cfun->va_list_fpr_size)
8961 type = TREE_TYPE (fpr);
8962 t = build2 (MODIFY_EXPR, type, fpr,
8963 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8964 TREE_SIDE_EFFECTS (t) = 1;
8965 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8968 /* Find the overflow area. */
8969 type = TREE_TYPE (ovf);
8970 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8971 ovf_rtx = crtl->args.internal_arg_pointer;
8972 else
8973 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8974 t = make_tree (type, ovf_rtx);
8975 if (words != 0)
8976 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8978 /* Store zero bounds for overflow area pointer. */
8979 if (chkp_function_instrumented_p (current_function_decl))
8980 chkp_expand_bounds_reset_for_mem (ovf, t);
8982 t = build2 (MODIFY_EXPR, type, ovf, t);
8983 TREE_SIDE_EFFECTS (t) = 1;
8984 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8986 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8988 /* Find the register save area.
8989 Prologue of the function save it right above stack frame. */
8990 type = TREE_TYPE (sav);
8991 t = make_tree (type, frame_pointer_rtx);
8992 if (!ix86_varargs_gpr_size)
8993 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8995 /* Store zero bounds for save area pointer. */
8996 if (chkp_function_instrumented_p (current_function_decl))
8997 chkp_expand_bounds_reset_for_mem (sav, t);
8999 t = build2 (MODIFY_EXPR, type, sav, t);
9000 TREE_SIDE_EFFECTS (t) = 1;
9001 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9005 /* Implement va_arg. */
9007 static tree
9008 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9009 gimple_seq *post_p)
9011 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9012 tree f_gpr, f_fpr, f_ovf, f_sav;
9013 tree gpr, fpr, ovf, sav, t;
9014 int size, rsize;
9015 tree lab_false, lab_over = NULL_TREE;
9016 tree addr, t2;
9017 rtx container;
9018 int indirect_p = 0;
9019 tree ptrtype;
9020 machine_mode nat_mode;
9021 unsigned int arg_boundary;
9023 /* Only 64bit target needs something special. */
9024 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9025 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9027 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9028 f_fpr = DECL_CHAIN (f_gpr);
9029 f_ovf = DECL_CHAIN (f_fpr);
9030 f_sav = DECL_CHAIN (f_ovf);
9032 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9033 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9034 valist = build_va_arg_indirect_ref (valist);
9035 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9036 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9037 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9039 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9040 if (indirect_p)
9041 type = build_pointer_type (type);
9042 size = int_size_in_bytes (type);
9043 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9045 nat_mode = type_natural_mode (type, NULL, false);
9046 switch (nat_mode)
9048 case V8SFmode:
9049 case V8SImode:
9050 case V32QImode:
9051 case V16HImode:
9052 case V4DFmode:
9053 case V4DImode:
9054 case V16SFmode:
9055 case V16SImode:
9056 case V64QImode:
9057 case V32HImode:
9058 case V8DFmode:
9059 case V8DImode:
9060 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9061 if (!TARGET_64BIT_MS_ABI)
9063 container = NULL;
9064 break;
9067 default:
9068 container = construct_container (nat_mode, TYPE_MODE (type),
9069 type, 0, X86_64_REGPARM_MAX,
9070 X86_64_SSE_REGPARM_MAX, intreg,
9072 break;
9075 /* Pull the value out of the saved registers. */
9077 addr = create_tmp_var (ptr_type_node, "addr");
9079 if (container)
9081 int needed_intregs, needed_sseregs;
9082 bool need_temp;
9083 tree int_addr, sse_addr;
9085 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9086 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9088 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9090 need_temp = (!REG_P (container)
9091 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9092 || TYPE_ALIGN (type) > 128));
9094 /* In case we are passing structure, verify that it is consecutive block
9095 on the register save area. If not we need to do moves. */
9096 if (!need_temp && !REG_P (container))
9098 /* Verify that all registers are strictly consecutive */
9099 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9101 int i;
9103 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9105 rtx slot = XVECEXP (container, 0, i);
9106 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9107 || INTVAL (XEXP (slot, 1)) != i * 16)
9108 need_temp = true;
9111 else
9113 int i;
9115 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9117 rtx slot = XVECEXP (container, 0, i);
9118 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9119 || INTVAL (XEXP (slot, 1)) != i * 8)
9120 need_temp = true;
9124 if (!need_temp)
9126 int_addr = addr;
9127 sse_addr = addr;
9129 else
9131 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9132 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9135 /* First ensure that we fit completely in registers. */
9136 if (needed_intregs)
9138 t = build_int_cst (TREE_TYPE (gpr),
9139 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9140 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9141 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9142 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9143 gimplify_and_add (t, pre_p);
9145 if (needed_sseregs)
9147 t = build_int_cst (TREE_TYPE (fpr),
9148 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9149 + X86_64_REGPARM_MAX * 8);
9150 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9151 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9152 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9153 gimplify_and_add (t, pre_p);
9156 /* Compute index to start of area used for integer regs. */
9157 if (needed_intregs)
9159 /* int_addr = gpr + sav; */
9160 t = fold_build_pointer_plus (sav, gpr);
9161 gimplify_assign (int_addr, t, pre_p);
9163 if (needed_sseregs)
9165 /* sse_addr = fpr + sav; */
9166 t = fold_build_pointer_plus (sav, fpr);
9167 gimplify_assign (sse_addr, t, pre_p);
9169 if (need_temp)
9171 int i, prev_size = 0;
9172 tree temp = create_tmp_var (type, "va_arg_tmp");
9174 /* addr = &temp; */
9175 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9176 gimplify_assign (addr, t, pre_p);
9178 for (i = 0; i < XVECLEN (container, 0); i++)
9180 rtx slot = XVECEXP (container, 0, i);
9181 rtx reg = XEXP (slot, 0);
9182 machine_mode mode = GET_MODE (reg);
9183 tree piece_type;
9184 tree addr_type;
9185 tree daddr_type;
9186 tree src_addr, src;
9187 int src_offset;
9188 tree dest_addr, dest;
9189 int cur_size = GET_MODE_SIZE (mode);
9191 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9192 prev_size = INTVAL (XEXP (slot, 1));
9193 if (prev_size + cur_size > size)
9195 cur_size = size - prev_size;
9196 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9197 if (mode == BLKmode)
9198 mode = QImode;
9200 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9201 if (mode == GET_MODE (reg))
9202 addr_type = build_pointer_type (piece_type);
9203 else
9204 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9205 true);
9206 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9207 true);
9209 if (SSE_REGNO_P (REGNO (reg)))
9211 src_addr = sse_addr;
9212 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9214 else
9216 src_addr = int_addr;
9217 src_offset = REGNO (reg) * 8;
9219 src_addr = fold_convert (addr_type, src_addr);
9220 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9222 dest_addr = fold_convert (daddr_type, addr);
9223 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9224 if (cur_size == GET_MODE_SIZE (mode))
9226 src = build_va_arg_indirect_ref (src_addr);
9227 dest = build_va_arg_indirect_ref (dest_addr);
9229 gimplify_assign (dest, src, pre_p);
9231 else
9233 tree copy
9234 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9235 3, dest_addr, src_addr,
9236 size_int (cur_size));
9237 gimplify_and_add (copy, pre_p);
9239 prev_size += cur_size;
9243 if (needed_intregs)
9245 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9246 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9247 gimplify_assign (gpr, t, pre_p);
9250 if (needed_sseregs)
9252 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9253 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9254 gimplify_assign (fpr, t, pre_p);
9257 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9259 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9262 /* ... otherwise out of the overflow area. */
9264 /* When we align parameter on stack for caller, if the parameter
9265 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9266 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9267 here with caller. */
9268 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9269 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9270 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9272 /* Care for on-stack alignment if needed. */
9273 if (arg_boundary <= 64 || size == 0)
9274 t = ovf;
9275 else
9277 HOST_WIDE_INT align = arg_boundary / 8;
9278 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9279 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9280 build_int_cst (TREE_TYPE (t), -align));
9283 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9284 gimplify_assign (addr, t, pre_p);
9286 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9287 gimplify_assign (unshare_expr (ovf), t, pre_p);
9289 if (container)
9290 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9292 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9293 addr = fold_convert (ptrtype, addr);
9295 if (indirect_p)
9296 addr = build_va_arg_indirect_ref (addr);
9297 return build_va_arg_indirect_ref (addr);
9300 /* Return true if OPNUM's MEM should be matched
9301 in movabs* patterns. */
9303 bool
9304 ix86_check_movabs (rtx insn, int opnum)
9306 rtx set, mem;
9308 set = PATTERN (insn);
9309 if (GET_CODE (set) == PARALLEL)
9310 set = XVECEXP (set, 0, 0);
9311 gcc_assert (GET_CODE (set) == SET);
9312 mem = XEXP (set, opnum);
9313 while (GET_CODE (mem) == SUBREG)
9314 mem = SUBREG_REG (mem);
9315 gcc_assert (MEM_P (mem));
9316 return volatile_ok || !MEM_VOLATILE_P (mem);
9319 /* Initialize the table of extra 80387 mathematical constants. */
9321 static void
9322 init_ext_80387_constants (void)
9324 static const char * cst[5] =
9326 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9327 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9328 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9329 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9330 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9332 int i;
9334 for (i = 0; i < 5; i++)
9336 real_from_string (&ext_80387_constants_table[i], cst[i]);
9337 /* Ensure each constant is rounded to XFmode precision. */
9338 real_convert (&ext_80387_constants_table[i],
9339 XFmode, &ext_80387_constants_table[i]);
9342 ext_80387_constants_init = 1;
9345 /* Return non-zero if the constant is something that
9346 can be loaded with a special instruction. */
9349 standard_80387_constant_p (rtx x)
9351 machine_mode mode = GET_MODE (x);
9353 REAL_VALUE_TYPE r;
9355 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9356 return -1;
9358 if (x == CONST0_RTX (mode))
9359 return 1;
9360 if (x == CONST1_RTX (mode))
9361 return 2;
9363 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9365 /* For XFmode constants, try to find a special 80387 instruction when
9366 optimizing for size or on those CPUs that benefit from them. */
9367 if (mode == XFmode
9368 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9370 int i;
9372 if (! ext_80387_constants_init)
9373 init_ext_80387_constants ();
9375 for (i = 0; i < 5; i++)
9376 if (real_identical (&r, &ext_80387_constants_table[i]))
9377 return i + 3;
9380 /* Load of the constant -0.0 or -1.0 will be split as
9381 fldz;fchs or fld1;fchs sequence. */
9382 if (real_isnegzero (&r))
9383 return 8;
9384 if (real_identical (&r, &dconstm1))
9385 return 9;
9387 return 0;
9390 /* Return the opcode of the special instruction to be used to load
9391 the constant X. */
9393 const char *
9394 standard_80387_constant_opcode (rtx x)
9396 switch (standard_80387_constant_p (x))
9398 case 1:
9399 return "fldz";
9400 case 2:
9401 return "fld1";
9402 case 3:
9403 return "fldlg2";
9404 case 4:
9405 return "fldln2";
9406 case 5:
9407 return "fldl2e";
9408 case 6:
9409 return "fldl2t";
9410 case 7:
9411 return "fldpi";
9412 case 8:
9413 case 9:
9414 return "#";
9415 default:
9416 gcc_unreachable ();
9420 /* Return the CONST_DOUBLE representing the 80387 constant that is
9421 loaded by the specified special instruction. The argument IDX
9422 matches the return value from standard_80387_constant_p. */
9425 standard_80387_constant_rtx (int idx)
9427 int i;
9429 if (! ext_80387_constants_init)
9430 init_ext_80387_constants ();
9432 switch (idx)
9434 case 3:
9435 case 4:
9436 case 5:
9437 case 6:
9438 case 7:
9439 i = idx - 3;
9440 break;
9442 default:
9443 gcc_unreachable ();
9446 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9447 XFmode);
9450 /* Return 1 if X is all 0s and 2 if x is all 1s
9451 in supported SSE/AVX vector mode. */
9454 standard_sse_constant_p (rtx x)
9456 machine_mode mode = GET_MODE (x);
9458 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9459 return 1;
9460 if (vector_all_ones_operand (x, mode))
9461 switch (mode)
9463 case V16QImode:
9464 case V8HImode:
9465 case V4SImode:
9466 case V2DImode:
9467 if (TARGET_SSE2)
9468 return 2;
9469 case V32QImode:
9470 case V16HImode:
9471 case V8SImode:
9472 case V4DImode:
9473 if (TARGET_AVX2)
9474 return 2;
9475 case V64QImode:
9476 case V32HImode:
9477 case V16SImode:
9478 case V8DImode:
9479 if (TARGET_AVX512F)
9480 return 2;
9481 default:
9482 break;
9485 return 0;
9488 /* Return the opcode of the special instruction to be used to load
9489 the constant X. */
9491 const char *
9492 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9494 switch (standard_sse_constant_p (x))
9496 case 1:
9497 switch (get_attr_mode (insn))
9499 case MODE_XI:
9500 return "vpxord\t%g0, %g0, %g0";
9501 case MODE_V16SF:
9502 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9503 : "vpxord\t%g0, %g0, %g0";
9504 case MODE_V8DF:
9505 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9506 : "vpxorq\t%g0, %g0, %g0";
9507 case MODE_TI:
9508 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9509 : "%vpxor\t%0, %d0";
9510 case MODE_V2DF:
9511 return "%vxorpd\t%0, %d0";
9512 case MODE_V4SF:
9513 return "%vxorps\t%0, %d0";
9515 case MODE_OI:
9516 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9517 : "vpxor\t%x0, %x0, %x0";
9518 case MODE_V4DF:
9519 return "vxorpd\t%x0, %x0, %x0";
9520 case MODE_V8SF:
9521 return "vxorps\t%x0, %x0, %x0";
9523 default:
9524 break;
9527 case 2:
9528 if (TARGET_AVX512VL
9529 || get_attr_mode (insn) == MODE_XI
9530 || get_attr_mode (insn) == MODE_V8DF
9531 || get_attr_mode (insn) == MODE_V16SF)
9532 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9533 if (TARGET_AVX)
9534 return "vpcmpeqd\t%0, %0, %0";
9535 else
9536 return "pcmpeqd\t%0, %0";
9538 default:
9539 break;
9541 gcc_unreachable ();
9544 /* Returns true if OP contains a symbol reference */
9546 bool
9547 symbolic_reference_mentioned_p (rtx op)
9549 const char *fmt;
9550 int i;
9552 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9553 return true;
9555 fmt = GET_RTX_FORMAT (GET_CODE (op));
9556 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9558 if (fmt[i] == 'E')
9560 int j;
9562 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9563 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9564 return true;
9567 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9568 return true;
9571 return false;
9574 /* Return true if it is appropriate to emit `ret' instructions in the
9575 body of a function. Do this only if the epilogue is simple, needing a
9576 couple of insns. Prior to reloading, we can't tell how many registers
9577 must be saved, so return false then. Return false if there is no frame
9578 marker to de-allocate. */
9580 bool
9581 ix86_can_use_return_insn_p (void)
9583 struct ix86_frame frame;
9585 if (! reload_completed || frame_pointer_needed)
9586 return 0;
9588 /* Don't allow more than 32k pop, since that's all we can do
9589 with one instruction. */
9590 if (crtl->args.pops_args && crtl->args.size >= 32768)
9591 return 0;
9593 ix86_compute_frame_layout (&frame);
9594 return (frame.stack_pointer_offset == UNITS_PER_WORD
9595 && (frame.nregs + frame.nsseregs) == 0);
9598 /* Value should be nonzero if functions must have frame pointers.
9599 Zero means the frame pointer need not be set up (and parms may
9600 be accessed via the stack pointer) in functions that seem suitable. */
9602 static bool
9603 ix86_frame_pointer_required (void)
9605 /* If we accessed previous frames, then the generated code expects
9606 to be able to access the saved ebp value in our frame. */
9607 if (cfun->machine->accesses_prev_frame)
9608 return true;
9610 /* Several x86 os'es need a frame pointer for other reasons,
9611 usually pertaining to setjmp. */
9612 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9613 return true;
9615 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9616 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9617 return true;
9619 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9620 allocation is 4GB. */
9621 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9622 return true;
9624 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9625 turns off the frame pointer by default. Turn it back on now if
9626 we've not got a leaf function. */
9627 if (TARGET_OMIT_LEAF_FRAME_POINTER
9628 && (!crtl->is_leaf
9629 || ix86_current_function_calls_tls_descriptor))
9630 return true;
9632 if (crtl->profile && !flag_fentry)
9633 return true;
9635 return false;
9638 /* Record that the current function accesses previous call frames. */
9640 void
9641 ix86_setup_frame_addresses (void)
9643 cfun->machine->accesses_prev_frame = 1;
9646 #ifndef USE_HIDDEN_LINKONCE
9647 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9648 # define USE_HIDDEN_LINKONCE 1
9649 # else
9650 # define USE_HIDDEN_LINKONCE 0
9651 # endif
9652 #endif
9654 static int pic_labels_used;
9656 /* Fills in the label name that should be used for a pc thunk for
9657 the given register. */
9659 static void
9660 get_pc_thunk_name (char name[32], unsigned int regno)
9662 gcc_assert (!TARGET_64BIT);
9664 if (USE_HIDDEN_LINKONCE)
9665 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9666 else
9667 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9671 /* This function generates code for -fpic that loads %ebx with
9672 the return address of the caller and then returns. */
9674 static void
9675 ix86_code_end (void)
9677 rtx xops[2];
9678 int regno;
9680 for (regno = AX_REG; regno <= SP_REG; regno++)
9682 char name[32];
9683 tree decl;
9685 if (!(pic_labels_used & (1 << regno)))
9686 continue;
9688 get_pc_thunk_name (name, regno);
9690 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9691 get_identifier (name),
9692 build_function_type_list (void_type_node, NULL_TREE));
9693 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9694 NULL_TREE, void_type_node);
9695 TREE_PUBLIC (decl) = 1;
9696 TREE_STATIC (decl) = 1;
9697 DECL_IGNORED_P (decl) = 1;
9699 #if TARGET_MACHO
9700 if (TARGET_MACHO)
9702 switch_to_section (darwin_sections[text_coal_section]);
9703 fputs ("\t.weak_definition\t", asm_out_file);
9704 assemble_name (asm_out_file, name);
9705 fputs ("\n\t.private_extern\t", asm_out_file);
9706 assemble_name (asm_out_file, name);
9707 putc ('\n', asm_out_file);
9708 ASM_OUTPUT_LABEL (asm_out_file, name);
9709 DECL_WEAK (decl) = 1;
9711 else
9712 #endif
9713 if (USE_HIDDEN_LINKONCE)
9715 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9717 targetm.asm_out.unique_section (decl, 0);
9718 switch_to_section (get_named_section (decl, NULL, 0));
9720 targetm.asm_out.globalize_label (asm_out_file, name);
9721 fputs ("\t.hidden\t", asm_out_file);
9722 assemble_name (asm_out_file, name);
9723 putc ('\n', asm_out_file);
9724 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9726 else
9728 switch_to_section (text_section);
9729 ASM_OUTPUT_LABEL (asm_out_file, name);
9732 DECL_INITIAL (decl) = make_node (BLOCK);
9733 current_function_decl = decl;
9734 init_function_start (decl);
9735 first_function_block_is_cold = false;
9736 /* Make sure unwind info is emitted for the thunk if needed. */
9737 final_start_function (emit_barrier (), asm_out_file, 1);
9739 /* Pad stack IP move with 4 instructions (two NOPs count
9740 as one instruction). */
9741 if (TARGET_PAD_SHORT_FUNCTION)
9743 int i = 8;
9745 while (i--)
9746 fputs ("\tnop\n", asm_out_file);
9749 xops[0] = gen_rtx_REG (Pmode, regno);
9750 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9751 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9752 output_asm_insn ("%!ret", NULL);
9753 final_end_function ();
9754 init_insn_lengths ();
9755 free_after_compilation (cfun);
9756 set_cfun (NULL);
9757 current_function_decl = NULL;
9760 if (flag_split_stack)
9761 file_end_indicate_split_stack ();
9764 /* Emit code for the SET_GOT patterns. */
9766 const char *
9767 output_set_got (rtx dest, rtx label)
9769 rtx xops[3];
9771 xops[0] = dest;
9773 if (TARGET_VXWORKS_RTP && flag_pic)
9775 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9776 xops[2] = gen_rtx_MEM (Pmode,
9777 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9778 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9780 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9781 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9782 an unadorned address. */
9783 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9784 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9785 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9786 return "";
9789 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9791 if (!flag_pic)
9793 if (TARGET_MACHO)
9794 /* We don't need a pic base, we're not producing pic. */
9795 gcc_unreachable ();
9797 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9798 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9799 targetm.asm_out.internal_label (asm_out_file, "L",
9800 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9802 else
9804 char name[32];
9805 get_pc_thunk_name (name, REGNO (dest));
9806 pic_labels_used |= 1 << REGNO (dest);
9808 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9809 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9810 output_asm_insn ("%!call\t%X2", xops);
9812 #if TARGET_MACHO
9813 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9814 This is what will be referenced by the Mach-O PIC subsystem. */
9815 if (machopic_should_output_picbase_label () || !label)
9816 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9818 /* When we are restoring the pic base at the site of a nonlocal label,
9819 and we decided to emit the pic base above, we will still output a
9820 local label used for calculating the correction offset (even though
9821 the offset will be 0 in that case). */
9822 if (label)
9823 targetm.asm_out.internal_label (asm_out_file, "L",
9824 CODE_LABEL_NUMBER (label));
9825 #endif
9828 if (!TARGET_MACHO)
9829 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9831 return "";
9834 /* Generate an "push" pattern for input ARG. */
9836 static rtx
9837 gen_push (rtx arg)
9839 struct machine_function *m = cfun->machine;
9841 if (m->fs.cfa_reg == stack_pointer_rtx)
9842 m->fs.cfa_offset += UNITS_PER_WORD;
9843 m->fs.sp_offset += UNITS_PER_WORD;
9845 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9846 arg = gen_rtx_REG (word_mode, REGNO (arg));
9848 return gen_rtx_SET (VOIDmode,
9849 gen_rtx_MEM (word_mode,
9850 gen_rtx_PRE_DEC (Pmode,
9851 stack_pointer_rtx)),
9852 arg);
9855 /* Generate an "pop" pattern for input ARG. */
9857 static rtx
9858 gen_pop (rtx arg)
9860 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9861 arg = gen_rtx_REG (word_mode, REGNO (arg));
9863 return gen_rtx_SET (VOIDmode,
9864 arg,
9865 gen_rtx_MEM (word_mode,
9866 gen_rtx_POST_INC (Pmode,
9867 stack_pointer_rtx)));
9870 /* Return >= 0 if there is an unused call-clobbered register available
9871 for the entire function. */
9873 static unsigned int
9874 ix86_select_alt_pic_regnum (void)
9876 if (ix86_use_pseudo_pic_reg ())
9877 return INVALID_REGNUM;
9879 if (crtl->is_leaf
9880 && !crtl->profile
9881 && !ix86_current_function_calls_tls_descriptor)
9883 int i, drap;
9884 /* Can't use the same register for both PIC and DRAP. */
9885 if (crtl->drap_reg)
9886 drap = REGNO (crtl->drap_reg);
9887 else
9888 drap = -1;
9889 for (i = 2; i >= 0; --i)
9890 if (i != drap && !df_regs_ever_live_p (i))
9891 return i;
9894 return INVALID_REGNUM;
9897 /* Return TRUE if we need to save REGNO. */
9899 static bool
9900 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9902 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9903 && pic_offset_table_rtx)
9905 if (ix86_use_pseudo_pic_reg ())
9907 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9908 _mcount in prologue. */
9909 if (!TARGET_64BIT && flag_pic && crtl->profile)
9910 return true;
9912 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9913 || crtl->profile
9914 || crtl->calls_eh_return
9915 || crtl->uses_const_pool
9916 || cfun->has_nonlocal_label)
9917 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9920 if (crtl->calls_eh_return && maybe_eh_return)
9922 unsigned i;
9923 for (i = 0; ; i++)
9925 unsigned test = EH_RETURN_DATA_REGNO (i);
9926 if (test == INVALID_REGNUM)
9927 break;
9928 if (test == regno)
9929 return true;
9933 if (crtl->drap_reg
9934 && regno == REGNO (crtl->drap_reg)
9935 && !cfun->machine->no_drap_save_restore)
9936 return true;
9938 return (df_regs_ever_live_p (regno)
9939 && !call_used_regs[regno]
9940 && !fixed_regs[regno]
9941 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9944 /* Return number of saved general prupose registers. */
9946 static int
9947 ix86_nsaved_regs (void)
9949 int nregs = 0;
9950 int regno;
9952 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9953 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9954 nregs ++;
9955 return nregs;
9958 /* Return number of saved SSE registrers. */
9960 static int
9961 ix86_nsaved_sseregs (void)
9963 int nregs = 0;
9964 int regno;
9966 if (!TARGET_64BIT_MS_ABI)
9967 return 0;
9968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9969 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9970 nregs ++;
9971 return nregs;
9974 /* Given FROM and TO register numbers, say whether this elimination is
9975 allowed. If stack alignment is needed, we can only replace argument
9976 pointer with hard frame pointer, or replace frame pointer with stack
9977 pointer. Otherwise, frame pointer elimination is automatically
9978 handled and all other eliminations are valid. */
9980 static bool
9981 ix86_can_eliminate (const int from, const int to)
9983 if (stack_realign_fp)
9984 return ((from == ARG_POINTER_REGNUM
9985 && to == HARD_FRAME_POINTER_REGNUM)
9986 || (from == FRAME_POINTER_REGNUM
9987 && to == STACK_POINTER_REGNUM));
9988 else
9989 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9992 /* Return the offset between two registers, one to be eliminated, and the other
9993 its replacement, at the start of a routine. */
9995 HOST_WIDE_INT
9996 ix86_initial_elimination_offset (int from, int to)
9998 struct ix86_frame frame;
9999 ix86_compute_frame_layout (&frame);
10001 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10002 return frame.hard_frame_pointer_offset;
10003 else if (from == FRAME_POINTER_REGNUM
10004 && to == HARD_FRAME_POINTER_REGNUM)
10005 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10006 else
10008 gcc_assert (to == STACK_POINTER_REGNUM);
10010 if (from == ARG_POINTER_REGNUM)
10011 return frame.stack_pointer_offset;
10013 gcc_assert (from == FRAME_POINTER_REGNUM);
10014 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10018 /* In a dynamically-aligned function, we can't know the offset from
10019 stack pointer to frame pointer, so we must ensure that setjmp
10020 eliminates fp against the hard fp (%ebp) rather than trying to
10021 index from %esp up to the top of the frame across a gap that is
10022 of unknown (at compile-time) size. */
10023 static rtx
10024 ix86_builtin_setjmp_frame_value (void)
10026 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10029 /* When using -fsplit-stack, the allocation routines set a field in
10030 the TCB to the bottom of the stack plus this much space, measured
10031 in bytes. */
10033 #define SPLIT_STACK_AVAILABLE 256
10035 /* Fill structure ix86_frame about frame of currently computed function. */
10037 static void
10038 ix86_compute_frame_layout (struct ix86_frame *frame)
10040 unsigned HOST_WIDE_INT stack_alignment_needed;
10041 HOST_WIDE_INT offset;
10042 unsigned HOST_WIDE_INT preferred_alignment;
10043 HOST_WIDE_INT size = get_frame_size ();
10044 HOST_WIDE_INT to_allocate;
10046 frame->nregs = ix86_nsaved_regs ();
10047 frame->nsseregs = ix86_nsaved_sseregs ();
10049 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10050 function prologues and leaf. */
10051 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10052 && (!crtl->is_leaf || cfun->calls_alloca != 0
10053 || ix86_current_function_calls_tls_descriptor))
10055 crtl->preferred_stack_boundary = 128;
10056 crtl->stack_alignment_needed = 128;
10058 /* preferred_stack_boundary is never updated for call
10059 expanded from tls descriptor. Update it here. We don't update it in
10060 expand stage because according to the comments before
10061 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10062 away. */
10063 else if (ix86_current_function_calls_tls_descriptor
10064 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10066 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10067 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10068 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10071 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10072 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10074 gcc_assert (!size || stack_alignment_needed);
10075 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10076 gcc_assert (preferred_alignment <= stack_alignment_needed);
10078 /* For SEH we have to limit the amount of code movement into the prologue.
10079 At present we do this via a BLOCKAGE, at which point there's very little
10080 scheduling that can be done, which means that there's very little point
10081 in doing anything except PUSHs. */
10082 if (TARGET_SEH)
10083 cfun->machine->use_fast_prologue_epilogue = false;
10085 /* During reload iteration the amount of registers saved can change.
10086 Recompute the value as needed. Do not recompute when amount of registers
10087 didn't change as reload does multiple calls to the function and does not
10088 expect the decision to change within single iteration. */
10089 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10090 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10092 int count = frame->nregs;
10093 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10095 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10097 /* The fast prologue uses move instead of push to save registers. This
10098 is significantly longer, but also executes faster as modern hardware
10099 can execute the moves in parallel, but can't do that for push/pop.
10101 Be careful about choosing what prologue to emit: When function takes
10102 many instructions to execute we may use slow version as well as in
10103 case function is known to be outside hot spot (this is known with
10104 feedback only). Weight the size of function by number of registers
10105 to save as it is cheap to use one or two push instructions but very
10106 slow to use many of them. */
10107 if (count)
10108 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10109 if (node->frequency < NODE_FREQUENCY_NORMAL
10110 || (flag_branch_probabilities
10111 && node->frequency < NODE_FREQUENCY_HOT))
10112 cfun->machine->use_fast_prologue_epilogue = false;
10113 else
10114 cfun->machine->use_fast_prologue_epilogue
10115 = !expensive_function_p (count);
10118 frame->save_regs_using_mov
10119 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10120 /* If static stack checking is enabled and done with probes,
10121 the registers need to be saved before allocating the frame. */
10122 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10124 /* Skip return address. */
10125 offset = UNITS_PER_WORD;
10127 /* Skip pushed static chain. */
10128 if (ix86_static_chain_on_stack)
10129 offset += UNITS_PER_WORD;
10131 /* Skip saved base pointer. */
10132 if (frame_pointer_needed)
10133 offset += UNITS_PER_WORD;
10134 frame->hfp_save_offset = offset;
10136 /* The traditional frame pointer location is at the top of the frame. */
10137 frame->hard_frame_pointer_offset = offset;
10139 /* Register save area */
10140 offset += frame->nregs * UNITS_PER_WORD;
10141 frame->reg_save_offset = offset;
10143 /* On SEH target, registers are pushed just before the frame pointer
10144 location. */
10145 if (TARGET_SEH)
10146 frame->hard_frame_pointer_offset = offset;
10148 /* Align and set SSE register save area. */
10149 if (frame->nsseregs)
10151 /* The only ABI that has saved SSE registers (Win64) also has a
10152 16-byte aligned default stack, and thus we don't need to be
10153 within the re-aligned local stack frame to save them. */
10154 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10155 offset = (offset + 16 - 1) & -16;
10156 offset += frame->nsseregs * 16;
10158 frame->sse_reg_save_offset = offset;
10160 /* The re-aligned stack starts here. Values before this point are not
10161 directly comparable with values below this point. In order to make
10162 sure that no value happens to be the same before and after, force
10163 the alignment computation below to add a non-zero value. */
10164 if (stack_realign_fp)
10165 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10167 /* Va-arg area */
10168 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10169 offset += frame->va_arg_size;
10171 /* Align start of frame for local function. */
10172 if (stack_realign_fp
10173 || offset != frame->sse_reg_save_offset
10174 || size != 0
10175 || !crtl->is_leaf
10176 || cfun->calls_alloca
10177 || ix86_current_function_calls_tls_descriptor)
10178 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10180 /* Frame pointer points here. */
10181 frame->frame_pointer_offset = offset;
10183 offset += size;
10185 /* Add outgoing arguments area. Can be skipped if we eliminated
10186 all the function calls as dead code.
10187 Skipping is however impossible when function calls alloca. Alloca
10188 expander assumes that last crtl->outgoing_args_size
10189 of stack frame are unused. */
10190 if (ACCUMULATE_OUTGOING_ARGS
10191 && (!crtl->is_leaf || cfun->calls_alloca
10192 || ix86_current_function_calls_tls_descriptor))
10194 offset += crtl->outgoing_args_size;
10195 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10197 else
10198 frame->outgoing_arguments_size = 0;
10200 /* Align stack boundary. Only needed if we're calling another function
10201 or using alloca. */
10202 if (!crtl->is_leaf || cfun->calls_alloca
10203 || ix86_current_function_calls_tls_descriptor)
10204 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10206 /* We've reached end of stack frame. */
10207 frame->stack_pointer_offset = offset;
10209 /* Size prologue needs to allocate. */
10210 to_allocate = offset - frame->sse_reg_save_offset;
10212 if ((!to_allocate && frame->nregs <= 1)
10213 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10214 frame->save_regs_using_mov = false;
10216 if (ix86_using_red_zone ()
10217 && crtl->sp_is_unchanging
10218 && crtl->is_leaf
10219 && !ix86_current_function_calls_tls_descriptor)
10221 frame->red_zone_size = to_allocate;
10222 if (frame->save_regs_using_mov)
10223 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10224 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10225 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10227 else
10228 frame->red_zone_size = 0;
10229 frame->stack_pointer_offset -= frame->red_zone_size;
10231 /* The SEH frame pointer location is near the bottom of the frame.
10232 This is enforced by the fact that the difference between the
10233 stack pointer and the frame pointer is limited to 240 bytes in
10234 the unwind data structure. */
10235 if (TARGET_SEH)
10237 HOST_WIDE_INT diff;
10239 /* If we can leave the frame pointer where it is, do so. Also, returns
10240 the establisher frame for __builtin_frame_address (0). */
10241 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10242 if (diff <= SEH_MAX_FRAME_SIZE
10243 && (diff > 240 || (diff & 15) != 0)
10244 && !crtl->accesses_prior_frames)
10246 /* Ideally we'd determine what portion of the local stack frame
10247 (within the constraint of the lowest 240) is most heavily used.
10248 But without that complication, simply bias the frame pointer
10249 by 128 bytes so as to maximize the amount of the local stack
10250 frame that is addressable with 8-bit offsets. */
10251 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10256 /* This is semi-inlined memory_address_length, but simplified
10257 since we know that we're always dealing with reg+offset, and
10258 to avoid having to create and discard all that rtl. */
10260 static inline int
10261 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10263 int len = 4;
10265 if (offset == 0)
10267 /* EBP and R13 cannot be encoded without an offset. */
10268 len = (regno == BP_REG || regno == R13_REG);
10270 else if (IN_RANGE (offset, -128, 127))
10271 len = 1;
10273 /* ESP and R12 must be encoded with a SIB byte. */
10274 if (regno == SP_REG || regno == R12_REG)
10275 len++;
10277 return len;
10280 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10281 The valid base registers are taken from CFUN->MACHINE->FS. */
10283 static rtx
10284 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10286 const struct machine_function *m = cfun->machine;
10287 rtx base_reg = NULL;
10288 HOST_WIDE_INT base_offset = 0;
10290 if (m->use_fast_prologue_epilogue)
10292 /* Choose the base register most likely to allow the most scheduling
10293 opportunities. Generally FP is valid throughout the function,
10294 while DRAP must be reloaded within the epilogue. But choose either
10295 over the SP due to increased encoding size. */
10297 if (m->fs.fp_valid)
10299 base_reg = hard_frame_pointer_rtx;
10300 base_offset = m->fs.fp_offset - cfa_offset;
10302 else if (m->fs.drap_valid)
10304 base_reg = crtl->drap_reg;
10305 base_offset = 0 - cfa_offset;
10307 else if (m->fs.sp_valid)
10309 base_reg = stack_pointer_rtx;
10310 base_offset = m->fs.sp_offset - cfa_offset;
10313 else
10315 HOST_WIDE_INT toffset;
10316 int len = 16, tlen;
10318 /* Choose the base register with the smallest address encoding.
10319 With a tie, choose FP > DRAP > SP. */
10320 if (m->fs.sp_valid)
10322 base_reg = stack_pointer_rtx;
10323 base_offset = m->fs.sp_offset - cfa_offset;
10324 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10326 if (m->fs.drap_valid)
10328 toffset = 0 - cfa_offset;
10329 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10330 if (tlen <= len)
10332 base_reg = crtl->drap_reg;
10333 base_offset = toffset;
10334 len = tlen;
10337 if (m->fs.fp_valid)
10339 toffset = m->fs.fp_offset - cfa_offset;
10340 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10341 if (tlen <= len)
10343 base_reg = hard_frame_pointer_rtx;
10344 base_offset = toffset;
10345 len = tlen;
10349 gcc_assert (base_reg != NULL);
10351 return plus_constant (Pmode, base_reg, base_offset);
10354 /* Emit code to save registers in the prologue. */
10356 static void
10357 ix86_emit_save_regs (void)
10359 unsigned int regno;
10360 rtx insn;
10362 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10363 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10365 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10366 RTX_FRAME_RELATED_P (insn) = 1;
10370 /* Emit a single register save at CFA - CFA_OFFSET. */
10372 static void
10373 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10374 HOST_WIDE_INT cfa_offset)
10376 struct machine_function *m = cfun->machine;
10377 rtx reg = gen_rtx_REG (mode, regno);
10378 rtx mem, addr, base, insn;
10380 addr = choose_baseaddr (cfa_offset);
10381 mem = gen_frame_mem (mode, addr);
10383 /* For SSE saves, we need to indicate the 128-bit alignment. */
10384 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10386 insn = emit_move_insn (mem, reg);
10387 RTX_FRAME_RELATED_P (insn) = 1;
10389 base = addr;
10390 if (GET_CODE (base) == PLUS)
10391 base = XEXP (base, 0);
10392 gcc_checking_assert (REG_P (base));
10394 /* When saving registers into a re-aligned local stack frame, avoid
10395 any tricky guessing by dwarf2out. */
10396 if (m->fs.realigned)
10398 gcc_checking_assert (stack_realign_drap);
10400 if (regno == REGNO (crtl->drap_reg))
10402 /* A bit of a hack. We force the DRAP register to be saved in
10403 the re-aligned stack frame, which provides us with a copy
10404 of the CFA that will last past the prologue. Install it. */
10405 gcc_checking_assert (cfun->machine->fs.fp_valid);
10406 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10407 cfun->machine->fs.fp_offset - cfa_offset);
10408 mem = gen_rtx_MEM (mode, addr);
10409 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10411 else
10413 /* The frame pointer is a stable reference within the
10414 aligned frame. Use it. */
10415 gcc_checking_assert (cfun->machine->fs.fp_valid);
10416 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10417 cfun->machine->fs.fp_offset - cfa_offset);
10418 mem = gen_rtx_MEM (mode, addr);
10419 add_reg_note (insn, REG_CFA_EXPRESSION,
10420 gen_rtx_SET (VOIDmode, mem, reg));
10424 /* The memory may not be relative to the current CFA register,
10425 which means that we may need to generate a new pattern for
10426 use by the unwind info. */
10427 else if (base != m->fs.cfa_reg)
10429 addr = plus_constant (Pmode, m->fs.cfa_reg,
10430 m->fs.cfa_offset - cfa_offset);
10431 mem = gen_rtx_MEM (mode, addr);
10432 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10436 /* Emit code to save registers using MOV insns.
10437 First register is stored at CFA - CFA_OFFSET. */
10438 static void
10439 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10441 unsigned int regno;
10443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10444 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10446 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10447 cfa_offset -= UNITS_PER_WORD;
10451 /* Emit code to save SSE registers using MOV insns.
10452 First register is stored at CFA - CFA_OFFSET. */
10453 static void
10454 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10456 unsigned int regno;
10458 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10459 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10461 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10462 cfa_offset -= 16;
10466 static GTY(()) rtx queued_cfa_restores;
10468 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10469 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10470 Don't add the note if the previously saved value will be left untouched
10471 within stack red-zone till return, as unwinders can find the same value
10472 in the register and on the stack. */
10474 static void
10475 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10477 if (!crtl->shrink_wrapped
10478 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10479 return;
10481 if (insn)
10483 add_reg_note (insn, REG_CFA_RESTORE, reg);
10484 RTX_FRAME_RELATED_P (insn) = 1;
10486 else
10487 queued_cfa_restores
10488 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10491 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10493 static void
10494 ix86_add_queued_cfa_restore_notes (rtx insn)
10496 rtx last;
10497 if (!queued_cfa_restores)
10498 return;
10499 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10501 XEXP (last, 1) = REG_NOTES (insn);
10502 REG_NOTES (insn) = queued_cfa_restores;
10503 queued_cfa_restores = NULL_RTX;
10504 RTX_FRAME_RELATED_P (insn) = 1;
10507 /* Expand prologue or epilogue stack adjustment.
10508 The pattern exist to put a dependency on all ebp-based memory accesses.
10509 STYLE should be negative if instructions should be marked as frame related,
10510 zero if %r11 register is live and cannot be freely used and positive
10511 otherwise. */
10513 static void
10514 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10515 int style, bool set_cfa)
10517 struct machine_function *m = cfun->machine;
10518 rtx insn;
10519 bool add_frame_related_expr = false;
10521 if (Pmode == SImode)
10522 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10523 else if (x86_64_immediate_operand (offset, DImode))
10524 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10525 else
10527 rtx tmp;
10528 /* r11 is used by indirect sibcall return as well, set before the
10529 epilogue and used after the epilogue. */
10530 if (style)
10531 tmp = gen_rtx_REG (DImode, R11_REG);
10532 else
10534 gcc_assert (src != hard_frame_pointer_rtx
10535 && dest != hard_frame_pointer_rtx);
10536 tmp = hard_frame_pointer_rtx;
10538 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10539 if (style < 0)
10540 add_frame_related_expr = true;
10542 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10545 insn = emit_insn (insn);
10546 if (style >= 0)
10547 ix86_add_queued_cfa_restore_notes (insn);
10549 if (set_cfa)
10551 rtx r;
10553 gcc_assert (m->fs.cfa_reg == src);
10554 m->fs.cfa_offset += INTVAL (offset);
10555 m->fs.cfa_reg = dest;
10557 r = gen_rtx_PLUS (Pmode, src, offset);
10558 r = gen_rtx_SET (VOIDmode, dest, r);
10559 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10560 RTX_FRAME_RELATED_P (insn) = 1;
10562 else if (style < 0)
10564 RTX_FRAME_RELATED_P (insn) = 1;
10565 if (add_frame_related_expr)
10567 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10568 r = gen_rtx_SET (VOIDmode, dest, r);
10569 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10573 if (dest == stack_pointer_rtx)
10575 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10576 bool valid = m->fs.sp_valid;
10578 if (src == hard_frame_pointer_rtx)
10580 valid = m->fs.fp_valid;
10581 ooffset = m->fs.fp_offset;
10583 else if (src == crtl->drap_reg)
10585 valid = m->fs.drap_valid;
10586 ooffset = 0;
10588 else
10590 /* Else there are two possibilities: SP itself, which we set
10591 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10592 taken care of this by hand along the eh_return path. */
10593 gcc_checking_assert (src == stack_pointer_rtx
10594 || offset == const0_rtx);
10597 m->fs.sp_offset = ooffset - INTVAL (offset);
10598 m->fs.sp_valid = valid;
10602 /* Find an available register to be used as dynamic realign argument
10603 pointer regsiter. Such a register will be written in prologue and
10604 used in begin of body, so it must not be
10605 1. parameter passing register.
10606 2. GOT pointer.
10607 We reuse static-chain register if it is available. Otherwise, we
10608 use DI for i386 and R13 for x86-64. We chose R13 since it has
10609 shorter encoding.
10611 Return: the regno of chosen register. */
10613 static unsigned int
10614 find_drap_reg (void)
10616 tree decl = cfun->decl;
10618 if (TARGET_64BIT)
10620 /* Use R13 for nested function or function need static chain.
10621 Since function with tail call may use any caller-saved
10622 registers in epilogue, DRAP must not use caller-saved
10623 register in such case. */
10624 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10625 return R13_REG;
10627 return R10_REG;
10629 else
10631 /* Use DI for nested function or function need static chain.
10632 Since function with tail call may use any caller-saved
10633 registers in epilogue, DRAP must not use caller-saved
10634 register in such case. */
10635 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10636 return DI_REG;
10638 /* Reuse static chain register if it isn't used for parameter
10639 passing. */
10640 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10642 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10643 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10644 return CX_REG;
10646 return DI_REG;
10650 /* Return minimum incoming stack alignment. */
10652 static unsigned int
10653 ix86_minimum_incoming_stack_boundary (bool sibcall)
10655 unsigned int incoming_stack_boundary;
10657 /* Prefer the one specified at command line. */
10658 if (ix86_user_incoming_stack_boundary)
10659 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10660 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10661 if -mstackrealign is used, it isn't used for sibcall check and
10662 estimated stack alignment is 128bit. */
10663 else if (!sibcall
10664 && !TARGET_64BIT
10665 && ix86_force_align_arg_pointer
10666 && crtl->stack_alignment_estimated == 128)
10667 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10668 else
10669 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10671 /* Incoming stack alignment can be changed on individual functions
10672 via force_align_arg_pointer attribute. We use the smallest
10673 incoming stack boundary. */
10674 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10675 && lookup_attribute (ix86_force_align_arg_pointer_string,
10676 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10677 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10679 /* The incoming stack frame has to be aligned at least at
10680 parm_stack_boundary. */
10681 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10682 incoming_stack_boundary = crtl->parm_stack_boundary;
10684 /* Stack at entrance of main is aligned by runtime. We use the
10685 smallest incoming stack boundary. */
10686 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10687 && DECL_NAME (current_function_decl)
10688 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10689 && DECL_FILE_SCOPE_P (current_function_decl))
10690 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10692 return incoming_stack_boundary;
10695 /* Update incoming stack boundary and estimated stack alignment. */
10697 static void
10698 ix86_update_stack_boundary (void)
10700 ix86_incoming_stack_boundary
10701 = ix86_minimum_incoming_stack_boundary (false);
10703 /* x86_64 vararg needs 16byte stack alignment for register save
10704 area. */
10705 if (TARGET_64BIT
10706 && cfun->stdarg
10707 && crtl->stack_alignment_estimated < 128)
10708 crtl->stack_alignment_estimated = 128;
10711 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10712 needed or an rtx for DRAP otherwise. */
10714 static rtx
10715 ix86_get_drap_rtx (void)
10717 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10718 crtl->need_drap = true;
10720 if (stack_realign_drap)
10722 /* Assign DRAP to vDRAP and returns vDRAP */
10723 unsigned int regno = find_drap_reg ();
10724 rtx drap_vreg;
10725 rtx arg_ptr;
10726 rtx_insn *seq, *insn;
10728 arg_ptr = gen_rtx_REG (Pmode, regno);
10729 crtl->drap_reg = arg_ptr;
10731 start_sequence ();
10732 drap_vreg = copy_to_reg (arg_ptr);
10733 seq = get_insns ();
10734 end_sequence ();
10736 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10737 if (!optimize)
10739 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10740 RTX_FRAME_RELATED_P (insn) = 1;
10742 return drap_vreg;
10744 else
10745 return NULL;
10748 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10750 static rtx
10751 ix86_internal_arg_pointer (void)
10753 return virtual_incoming_args_rtx;
10756 struct scratch_reg {
10757 rtx reg;
10758 bool saved;
10761 /* Return a short-lived scratch register for use on function entry.
10762 In 32-bit mode, it is valid only after the registers are saved
10763 in the prologue. This register must be released by means of
10764 release_scratch_register_on_entry once it is dead. */
10766 static void
10767 get_scratch_register_on_entry (struct scratch_reg *sr)
10769 int regno;
10771 sr->saved = false;
10773 if (TARGET_64BIT)
10775 /* We always use R11 in 64-bit mode. */
10776 regno = R11_REG;
10778 else
10780 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10781 bool fastcall_p
10782 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10783 bool thiscall_p
10784 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10785 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10786 int regparm = ix86_function_regparm (fntype, decl);
10787 int drap_regno
10788 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10790 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10791 for the static chain register. */
10792 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10793 && drap_regno != AX_REG)
10794 regno = AX_REG;
10795 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10796 for the static chain register. */
10797 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10798 regno = AX_REG;
10799 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10800 regno = DX_REG;
10801 /* ecx is the static chain register. */
10802 else if (regparm < 3 && !fastcall_p && !thiscall_p
10803 && !static_chain_p
10804 && drap_regno != CX_REG)
10805 regno = CX_REG;
10806 else if (ix86_save_reg (BX_REG, true))
10807 regno = BX_REG;
10808 /* esi is the static chain register. */
10809 else if (!(regparm == 3 && static_chain_p)
10810 && ix86_save_reg (SI_REG, true))
10811 regno = SI_REG;
10812 else if (ix86_save_reg (DI_REG, true))
10813 regno = DI_REG;
10814 else
10816 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10817 sr->saved = true;
10821 sr->reg = gen_rtx_REG (Pmode, regno);
10822 if (sr->saved)
10824 rtx insn = emit_insn (gen_push (sr->reg));
10825 RTX_FRAME_RELATED_P (insn) = 1;
10829 /* Release a scratch register obtained from the preceding function. */
10831 static void
10832 release_scratch_register_on_entry (struct scratch_reg *sr)
10834 if (sr->saved)
10836 struct machine_function *m = cfun->machine;
10837 rtx x, insn = emit_insn (gen_pop (sr->reg));
10839 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10840 RTX_FRAME_RELATED_P (insn) = 1;
10841 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10842 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10843 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10844 m->fs.sp_offset -= UNITS_PER_WORD;
10848 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10850 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10852 static void
10853 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10855 /* We skip the probe for the first interval + a small dope of 4 words and
10856 probe that many bytes past the specified size to maintain a protection
10857 area at the botton of the stack. */
10858 const int dope = 4 * UNITS_PER_WORD;
10859 rtx size_rtx = GEN_INT (size), last;
10861 /* See if we have a constant small number of probes to generate. If so,
10862 that's the easy case. The run-time loop is made up of 11 insns in the
10863 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10864 for n # of intervals. */
10865 if (size <= 5 * PROBE_INTERVAL)
10867 HOST_WIDE_INT i, adjust;
10868 bool first_probe = true;
10870 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10871 values of N from 1 until it exceeds SIZE. If only one probe is
10872 needed, this will not generate any code. Then adjust and probe
10873 to PROBE_INTERVAL + SIZE. */
10874 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10876 if (first_probe)
10878 adjust = 2 * PROBE_INTERVAL + dope;
10879 first_probe = false;
10881 else
10882 adjust = PROBE_INTERVAL;
10884 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10885 plus_constant (Pmode, stack_pointer_rtx,
10886 -adjust)));
10887 emit_stack_probe (stack_pointer_rtx);
10890 if (first_probe)
10891 adjust = size + PROBE_INTERVAL + dope;
10892 else
10893 adjust = size + PROBE_INTERVAL - i;
10895 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10896 plus_constant (Pmode, stack_pointer_rtx,
10897 -adjust)));
10898 emit_stack_probe (stack_pointer_rtx);
10900 /* Adjust back to account for the additional first interval. */
10901 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10902 plus_constant (Pmode, stack_pointer_rtx,
10903 PROBE_INTERVAL + dope)));
10906 /* Otherwise, do the same as above, but in a loop. Note that we must be
10907 extra careful with variables wrapping around because we might be at
10908 the very top (or the very bottom) of the address space and we have
10909 to be able to handle this case properly; in particular, we use an
10910 equality test for the loop condition. */
10911 else
10913 HOST_WIDE_INT rounded_size;
10914 struct scratch_reg sr;
10916 get_scratch_register_on_entry (&sr);
10919 /* Step 1: round SIZE to the previous multiple of the interval. */
10921 rounded_size = size & -PROBE_INTERVAL;
10924 /* Step 2: compute initial and final value of the loop counter. */
10926 /* SP = SP_0 + PROBE_INTERVAL. */
10927 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10928 plus_constant (Pmode, stack_pointer_rtx,
10929 - (PROBE_INTERVAL + dope))));
10931 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10932 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10933 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10934 gen_rtx_PLUS (Pmode, sr.reg,
10935 stack_pointer_rtx)));
10938 /* Step 3: the loop
10940 while (SP != LAST_ADDR)
10942 SP = SP + PROBE_INTERVAL
10943 probe at SP
10946 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10947 values of N from 1 until it is equal to ROUNDED_SIZE. */
10949 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10952 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10953 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10955 if (size != rounded_size)
10957 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10958 plus_constant (Pmode, stack_pointer_rtx,
10959 rounded_size - size)));
10960 emit_stack_probe (stack_pointer_rtx);
10963 /* Adjust back to account for the additional first interval. */
10964 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10965 plus_constant (Pmode, stack_pointer_rtx,
10966 PROBE_INTERVAL + dope)));
10968 release_scratch_register_on_entry (&sr);
10971 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10973 /* Even if the stack pointer isn't the CFA register, we need to correctly
10974 describe the adjustments made to it, in particular differentiate the
10975 frame-related ones from the frame-unrelated ones. */
10976 if (size > 0)
10978 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10979 XVECEXP (expr, 0, 0)
10980 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10981 plus_constant (Pmode, stack_pointer_rtx, -size));
10982 XVECEXP (expr, 0, 1)
10983 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10984 plus_constant (Pmode, stack_pointer_rtx,
10985 PROBE_INTERVAL + dope + size));
10986 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10987 RTX_FRAME_RELATED_P (last) = 1;
10989 cfun->machine->fs.sp_offset += size;
10992 /* Make sure nothing is scheduled before we are done. */
10993 emit_insn (gen_blockage ());
10996 /* Adjust the stack pointer up to REG while probing it. */
10998 const char *
10999 output_adjust_stack_and_probe (rtx reg)
11001 static int labelno = 0;
11002 char loop_lab[32], end_lab[32];
11003 rtx xops[2];
11005 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11006 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11008 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11010 /* Jump to END_LAB if SP == LAST_ADDR. */
11011 xops[0] = stack_pointer_rtx;
11012 xops[1] = reg;
11013 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11014 fputs ("\tje\t", asm_out_file);
11015 assemble_name_raw (asm_out_file, end_lab);
11016 fputc ('\n', asm_out_file);
11018 /* SP = SP + PROBE_INTERVAL. */
11019 xops[1] = GEN_INT (PROBE_INTERVAL);
11020 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11022 /* Probe at SP. */
11023 xops[1] = const0_rtx;
11024 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11026 fprintf (asm_out_file, "\tjmp\t");
11027 assemble_name_raw (asm_out_file, loop_lab);
11028 fputc ('\n', asm_out_file);
11030 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11032 return "";
11035 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11036 inclusive. These are offsets from the current stack pointer. */
11038 static void
11039 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11041 /* See if we have a constant small number of probes to generate. If so,
11042 that's the easy case. The run-time loop is made up of 7 insns in the
11043 generic case while the compile-time loop is made up of n insns for n #
11044 of intervals. */
11045 if (size <= 7 * PROBE_INTERVAL)
11047 HOST_WIDE_INT i;
11049 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11050 it exceeds SIZE. If only one probe is needed, this will not
11051 generate any code. Then probe at FIRST + SIZE. */
11052 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11053 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11054 -(first + i)));
11056 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11057 -(first + size)));
11060 /* Otherwise, do the same as above, but in a loop. Note that we must be
11061 extra careful with variables wrapping around because we might be at
11062 the very top (or the very bottom) of the address space and we have
11063 to be able to handle this case properly; in particular, we use an
11064 equality test for the loop condition. */
11065 else
11067 HOST_WIDE_INT rounded_size, last;
11068 struct scratch_reg sr;
11070 get_scratch_register_on_entry (&sr);
11073 /* Step 1: round SIZE to the previous multiple of the interval. */
11075 rounded_size = size & -PROBE_INTERVAL;
11078 /* Step 2: compute initial and final value of the loop counter. */
11080 /* TEST_OFFSET = FIRST. */
11081 emit_move_insn (sr.reg, GEN_INT (-first));
11083 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11084 last = first + rounded_size;
11087 /* Step 3: the loop
11089 while (TEST_ADDR != LAST_ADDR)
11091 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11092 probe at TEST_ADDR
11095 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11096 until it is equal to ROUNDED_SIZE. */
11098 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11101 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11102 that SIZE is equal to ROUNDED_SIZE. */
11104 if (size != rounded_size)
11105 emit_stack_probe (plus_constant (Pmode,
11106 gen_rtx_PLUS (Pmode,
11107 stack_pointer_rtx,
11108 sr.reg),
11109 rounded_size - size));
11111 release_scratch_register_on_entry (&sr);
11114 /* Make sure nothing is scheduled before we are done. */
11115 emit_insn (gen_blockage ());
11118 /* Probe a range of stack addresses from REG to END, inclusive. These are
11119 offsets from the current stack pointer. */
11121 const char *
11122 output_probe_stack_range (rtx reg, rtx end)
11124 static int labelno = 0;
11125 char loop_lab[32], end_lab[32];
11126 rtx xops[3];
11128 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11129 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11131 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11133 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11134 xops[0] = reg;
11135 xops[1] = end;
11136 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11137 fputs ("\tje\t", asm_out_file);
11138 assemble_name_raw (asm_out_file, end_lab);
11139 fputc ('\n', asm_out_file);
11141 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11142 xops[1] = GEN_INT (PROBE_INTERVAL);
11143 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11145 /* Probe at TEST_ADDR. */
11146 xops[0] = stack_pointer_rtx;
11147 xops[1] = reg;
11148 xops[2] = const0_rtx;
11149 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11151 fprintf (asm_out_file, "\tjmp\t");
11152 assemble_name_raw (asm_out_file, loop_lab);
11153 fputc ('\n', asm_out_file);
11155 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11157 return "";
11160 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11161 to be generated in correct form. */
11162 static void
11163 ix86_finalize_stack_realign_flags (void)
11165 /* Check if stack realign is really needed after reload, and
11166 stores result in cfun */
11167 unsigned int incoming_stack_boundary
11168 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11169 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11170 unsigned int stack_realign = (incoming_stack_boundary
11171 < (crtl->is_leaf
11172 ? crtl->max_used_stack_slot_alignment
11173 : crtl->stack_alignment_needed));
11175 if (crtl->stack_realign_finalized)
11177 /* After stack_realign_needed is finalized, we can't no longer
11178 change it. */
11179 gcc_assert (crtl->stack_realign_needed == stack_realign);
11180 return;
11183 /* If the only reason for frame_pointer_needed is that we conservatively
11184 assumed stack realignment might be needed, but in the end nothing that
11185 needed the stack alignment had been spilled, clear frame_pointer_needed
11186 and say we don't need stack realignment. */
11187 if (stack_realign
11188 && frame_pointer_needed
11189 && crtl->is_leaf
11190 && flag_omit_frame_pointer
11191 && crtl->sp_is_unchanging
11192 && !ix86_current_function_calls_tls_descriptor
11193 && !crtl->accesses_prior_frames
11194 && !cfun->calls_alloca
11195 && !crtl->calls_eh_return
11196 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11197 && !ix86_frame_pointer_required ()
11198 && get_frame_size () == 0
11199 && ix86_nsaved_sseregs () == 0
11200 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11202 HARD_REG_SET set_up_by_prologue, prologue_used;
11203 basic_block bb;
11205 CLEAR_HARD_REG_SET (prologue_used);
11206 CLEAR_HARD_REG_SET (set_up_by_prologue);
11207 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11208 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11209 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11210 HARD_FRAME_POINTER_REGNUM);
11211 FOR_EACH_BB_FN (bb, cfun)
11213 rtx_insn *insn;
11214 FOR_BB_INSNS (bb, insn)
11215 if (NONDEBUG_INSN_P (insn)
11216 && requires_stack_frame_p (insn, prologue_used,
11217 set_up_by_prologue))
11219 crtl->stack_realign_needed = stack_realign;
11220 crtl->stack_realign_finalized = true;
11221 return;
11225 /* If drap has been set, but it actually isn't live at the start
11226 of the function, there is no reason to set it up. */
11227 if (crtl->drap_reg)
11229 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11230 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11232 crtl->drap_reg = NULL_RTX;
11233 crtl->need_drap = false;
11236 else
11237 cfun->machine->no_drap_save_restore = true;
11239 frame_pointer_needed = false;
11240 stack_realign = false;
11241 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11242 crtl->stack_alignment_needed = incoming_stack_boundary;
11243 crtl->stack_alignment_estimated = incoming_stack_boundary;
11244 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11245 crtl->preferred_stack_boundary = incoming_stack_boundary;
11246 df_finish_pass (true);
11247 df_scan_alloc (NULL);
11248 df_scan_blocks ();
11249 df_compute_regs_ever_live (true);
11250 df_analyze ();
11253 crtl->stack_realign_needed = stack_realign;
11254 crtl->stack_realign_finalized = true;
11257 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11259 static void
11260 ix86_elim_entry_set_got (rtx reg)
11262 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11263 rtx_insn *c_insn = BB_HEAD (bb);
11264 if (!NONDEBUG_INSN_P (c_insn))
11265 c_insn = next_nonnote_nondebug_insn (c_insn);
11266 if (c_insn && NONJUMP_INSN_P (c_insn))
11268 rtx pat = PATTERN (c_insn);
11269 if (GET_CODE (pat) == PARALLEL)
11271 rtx vec = XVECEXP (pat, 0, 0);
11272 if (GET_CODE (vec) == SET
11273 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11274 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11275 delete_insn (c_insn);
11280 /* Expand the prologue into a bunch of separate insns. */
11282 void
11283 ix86_expand_prologue (void)
11285 struct machine_function *m = cfun->machine;
11286 rtx insn, t;
11287 struct ix86_frame frame;
11288 HOST_WIDE_INT allocate;
11289 bool int_registers_saved;
11290 bool sse_registers_saved;
11292 ix86_finalize_stack_realign_flags ();
11294 /* DRAP should not coexist with stack_realign_fp */
11295 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11297 memset (&m->fs, 0, sizeof (m->fs));
11299 /* Initialize CFA state for before the prologue. */
11300 m->fs.cfa_reg = stack_pointer_rtx;
11301 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11303 /* Track SP offset to the CFA. We continue tracking this after we've
11304 swapped the CFA register away from SP. In the case of re-alignment
11305 this is fudged; we're interested to offsets within the local frame. */
11306 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11307 m->fs.sp_valid = true;
11309 ix86_compute_frame_layout (&frame);
11311 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11313 /* We should have already generated an error for any use of
11314 ms_hook on a nested function. */
11315 gcc_checking_assert (!ix86_static_chain_on_stack);
11317 /* Check if profiling is active and we shall use profiling before
11318 prologue variant. If so sorry. */
11319 if (crtl->profile && flag_fentry != 0)
11320 sorry ("ms_hook_prologue attribute isn%'t compatible "
11321 "with -mfentry for 32-bit");
11323 /* In ix86_asm_output_function_label we emitted:
11324 8b ff movl.s %edi,%edi
11325 55 push %ebp
11326 8b ec movl.s %esp,%ebp
11328 This matches the hookable function prologue in Win32 API
11329 functions in Microsoft Windows XP Service Pack 2 and newer.
11330 Wine uses this to enable Windows apps to hook the Win32 API
11331 functions provided by Wine.
11333 What that means is that we've already set up the frame pointer. */
11335 if (frame_pointer_needed
11336 && !(crtl->drap_reg && crtl->stack_realign_needed))
11338 rtx push, mov;
11340 /* We've decided to use the frame pointer already set up.
11341 Describe this to the unwinder by pretending that both
11342 push and mov insns happen right here.
11344 Putting the unwind info here at the end of the ms_hook
11345 is done so that we can make absolutely certain we get
11346 the required byte sequence at the start of the function,
11347 rather than relying on an assembler that can produce
11348 the exact encoding required.
11350 However it does mean (in the unpatched case) that we have
11351 a 1 insn window where the asynchronous unwind info is
11352 incorrect. However, if we placed the unwind info at
11353 its correct location we would have incorrect unwind info
11354 in the patched case. Which is probably all moot since
11355 I don't expect Wine generates dwarf2 unwind info for the
11356 system libraries that use this feature. */
11358 insn = emit_insn (gen_blockage ());
11360 push = gen_push (hard_frame_pointer_rtx);
11361 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11362 stack_pointer_rtx);
11363 RTX_FRAME_RELATED_P (push) = 1;
11364 RTX_FRAME_RELATED_P (mov) = 1;
11366 RTX_FRAME_RELATED_P (insn) = 1;
11367 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11368 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11370 /* Note that gen_push incremented m->fs.cfa_offset, even
11371 though we didn't emit the push insn here. */
11372 m->fs.cfa_reg = hard_frame_pointer_rtx;
11373 m->fs.fp_offset = m->fs.cfa_offset;
11374 m->fs.fp_valid = true;
11376 else
11378 /* The frame pointer is not needed so pop %ebp again.
11379 This leaves us with a pristine state. */
11380 emit_insn (gen_pop (hard_frame_pointer_rtx));
11384 /* The first insn of a function that accepts its static chain on the
11385 stack is to push the register that would be filled in by a direct
11386 call. This insn will be skipped by the trampoline. */
11387 else if (ix86_static_chain_on_stack)
11389 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11390 emit_insn (gen_blockage ());
11392 /* We don't want to interpret this push insn as a register save,
11393 only as a stack adjustment. The real copy of the register as
11394 a save will be done later, if needed. */
11395 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11396 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11397 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11398 RTX_FRAME_RELATED_P (insn) = 1;
11401 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11402 of DRAP is needed and stack realignment is really needed after reload */
11403 if (stack_realign_drap)
11405 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11407 /* Only need to push parameter pointer reg if it is caller saved. */
11408 if (!call_used_regs[REGNO (crtl->drap_reg)])
11410 /* Push arg pointer reg */
11411 insn = emit_insn (gen_push (crtl->drap_reg));
11412 RTX_FRAME_RELATED_P (insn) = 1;
11415 /* Grab the argument pointer. */
11416 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11417 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11418 RTX_FRAME_RELATED_P (insn) = 1;
11419 m->fs.cfa_reg = crtl->drap_reg;
11420 m->fs.cfa_offset = 0;
11422 /* Align the stack. */
11423 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11424 stack_pointer_rtx,
11425 GEN_INT (-align_bytes)));
11426 RTX_FRAME_RELATED_P (insn) = 1;
11428 /* Replicate the return address on the stack so that return
11429 address can be reached via (argp - 1) slot. This is needed
11430 to implement macro RETURN_ADDR_RTX and intrinsic function
11431 expand_builtin_return_addr etc. */
11432 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11433 t = gen_frame_mem (word_mode, t);
11434 insn = emit_insn (gen_push (t));
11435 RTX_FRAME_RELATED_P (insn) = 1;
11437 /* For the purposes of frame and register save area addressing,
11438 we've started over with a new frame. */
11439 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11440 m->fs.realigned = true;
11443 int_registers_saved = (frame.nregs == 0);
11444 sse_registers_saved = (frame.nsseregs == 0);
11446 if (frame_pointer_needed && !m->fs.fp_valid)
11448 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11449 slower on all targets. Also sdb doesn't like it. */
11450 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11451 RTX_FRAME_RELATED_P (insn) = 1;
11453 /* Push registers now, before setting the frame pointer
11454 on SEH target. */
11455 if (!int_registers_saved
11456 && TARGET_SEH
11457 && !frame.save_regs_using_mov)
11459 ix86_emit_save_regs ();
11460 int_registers_saved = true;
11461 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11464 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11466 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11467 RTX_FRAME_RELATED_P (insn) = 1;
11469 if (m->fs.cfa_reg == stack_pointer_rtx)
11470 m->fs.cfa_reg = hard_frame_pointer_rtx;
11471 m->fs.fp_offset = m->fs.sp_offset;
11472 m->fs.fp_valid = true;
11476 if (!int_registers_saved)
11478 /* If saving registers via PUSH, do so now. */
11479 if (!frame.save_regs_using_mov)
11481 ix86_emit_save_regs ();
11482 int_registers_saved = true;
11483 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11486 /* When using red zone we may start register saving before allocating
11487 the stack frame saving one cycle of the prologue. However, avoid
11488 doing this if we have to probe the stack; at least on x86_64 the
11489 stack probe can turn into a call that clobbers a red zone location. */
11490 else if (ix86_using_red_zone ()
11491 && (! TARGET_STACK_PROBE
11492 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11494 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11495 int_registers_saved = true;
11499 if (stack_realign_fp)
11501 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11502 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11504 /* The computation of the size of the re-aligned stack frame means
11505 that we must allocate the size of the register save area before
11506 performing the actual alignment. Otherwise we cannot guarantee
11507 that there's enough storage above the realignment point. */
11508 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11509 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11510 GEN_INT (m->fs.sp_offset
11511 - frame.sse_reg_save_offset),
11512 -1, false);
11514 /* Align the stack. */
11515 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11516 stack_pointer_rtx,
11517 GEN_INT (-align_bytes)));
11519 /* For the purposes of register save area addressing, the stack
11520 pointer is no longer valid. As for the value of sp_offset,
11521 see ix86_compute_frame_layout, which we need to match in order
11522 to pass verification of stack_pointer_offset at the end. */
11523 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11524 m->fs.sp_valid = false;
11527 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11529 if (flag_stack_usage_info)
11531 /* We start to count from ARG_POINTER. */
11532 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11534 /* If it was realigned, take into account the fake frame. */
11535 if (stack_realign_drap)
11537 if (ix86_static_chain_on_stack)
11538 stack_size += UNITS_PER_WORD;
11540 if (!call_used_regs[REGNO (crtl->drap_reg)])
11541 stack_size += UNITS_PER_WORD;
11543 /* This over-estimates by 1 minimal-stack-alignment-unit but
11544 mitigates that by counting in the new return address slot. */
11545 current_function_dynamic_stack_size
11546 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11549 current_function_static_stack_size = stack_size;
11552 /* On SEH target with very large frame size, allocate an area to save
11553 SSE registers (as the very large allocation won't be described). */
11554 if (TARGET_SEH
11555 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11556 && !sse_registers_saved)
11558 HOST_WIDE_INT sse_size =
11559 frame.sse_reg_save_offset - frame.reg_save_offset;
11561 gcc_assert (int_registers_saved);
11563 /* No need to do stack checking as the area will be immediately
11564 written. */
11565 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11566 GEN_INT (-sse_size), -1,
11567 m->fs.cfa_reg == stack_pointer_rtx);
11568 allocate -= sse_size;
11569 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11570 sse_registers_saved = true;
11573 /* The stack has already been decremented by the instruction calling us
11574 so probe if the size is non-negative to preserve the protection area. */
11575 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11577 /* We expect the registers to be saved when probes are used. */
11578 gcc_assert (int_registers_saved);
11580 if (STACK_CHECK_MOVING_SP)
11582 if (!(crtl->is_leaf && !cfun->calls_alloca
11583 && allocate <= PROBE_INTERVAL))
11585 ix86_adjust_stack_and_probe (allocate);
11586 allocate = 0;
11589 else
11591 HOST_WIDE_INT size = allocate;
11593 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11594 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11596 if (TARGET_STACK_PROBE)
11598 if (crtl->is_leaf && !cfun->calls_alloca)
11600 if (size > PROBE_INTERVAL)
11601 ix86_emit_probe_stack_range (0, size);
11603 else
11604 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11606 else
11608 if (crtl->is_leaf && !cfun->calls_alloca)
11610 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11611 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11612 size - STACK_CHECK_PROTECT);
11614 else
11615 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11620 if (allocate == 0)
11622 else if (!ix86_target_stack_probe ()
11623 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11625 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11626 GEN_INT (-allocate), -1,
11627 m->fs.cfa_reg == stack_pointer_rtx);
11629 else
11631 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11632 rtx r10 = NULL;
11633 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11634 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11635 bool eax_live = ix86_eax_live_at_start_p ();
11636 bool r10_live = false;
11638 if (TARGET_64BIT)
11639 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11641 if (eax_live)
11643 insn = emit_insn (gen_push (eax));
11644 allocate -= UNITS_PER_WORD;
11645 /* Note that SEH directives need to continue tracking the stack
11646 pointer even after the frame pointer has been set up. */
11647 if (sp_is_cfa_reg || TARGET_SEH)
11649 if (sp_is_cfa_reg)
11650 m->fs.cfa_offset += UNITS_PER_WORD;
11651 RTX_FRAME_RELATED_P (insn) = 1;
11652 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11653 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11654 plus_constant (Pmode, stack_pointer_rtx,
11655 -UNITS_PER_WORD)));
11659 if (r10_live)
11661 r10 = gen_rtx_REG (Pmode, R10_REG);
11662 insn = emit_insn (gen_push (r10));
11663 allocate -= UNITS_PER_WORD;
11664 if (sp_is_cfa_reg || TARGET_SEH)
11666 if (sp_is_cfa_reg)
11667 m->fs.cfa_offset += UNITS_PER_WORD;
11668 RTX_FRAME_RELATED_P (insn) = 1;
11669 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11670 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11671 plus_constant (Pmode, stack_pointer_rtx,
11672 -UNITS_PER_WORD)));
11676 emit_move_insn (eax, GEN_INT (allocate));
11677 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11679 /* Use the fact that AX still contains ALLOCATE. */
11680 adjust_stack_insn = (Pmode == DImode
11681 ? gen_pro_epilogue_adjust_stack_di_sub
11682 : gen_pro_epilogue_adjust_stack_si_sub);
11684 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11685 stack_pointer_rtx, eax));
11687 if (sp_is_cfa_reg || TARGET_SEH)
11689 if (sp_is_cfa_reg)
11690 m->fs.cfa_offset += allocate;
11691 RTX_FRAME_RELATED_P (insn) = 1;
11692 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11693 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11694 plus_constant (Pmode, stack_pointer_rtx,
11695 -allocate)));
11697 m->fs.sp_offset += allocate;
11699 /* Use stack_pointer_rtx for relative addressing so that code
11700 works for realigned stack, too. */
11701 if (r10_live && eax_live)
11703 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11704 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11705 gen_frame_mem (word_mode, t));
11706 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11707 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11708 gen_frame_mem (word_mode, t));
11710 else if (eax_live || r10_live)
11712 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11713 emit_move_insn (gen_rtx_REG (word_mode,
11714 (eax_live ? AX_REG : R10_REG)),
11715 gen_frame_mem (word_mode, t));
11718 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11720 /* If we havn't already set up the frame pointer, do so now. */
11721 if (frame_pointer_needed && !m->fs.fp_valid)
11723 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11724 GEN_INT (frame.stack_pointer_offset
11725 - frame.hard_frame_pointer_offset));
11726 insn = emit_insn (insn);
11727 RTX_FRAME_RELATED_P (insn) = 1;
11728 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11730 if (m->fs.cfa_reg == stack_pointer_rtx)
11731 m->fs.cfa_reg = hard_frame_pointer_rtx;
11732 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11733 m->fs.fp_valid = true;
11736 if (!int_registers_saved)
11737 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11738 if (!sse_registers_saved)
11739 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11741 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11742 in PROLOGUE. */
11743 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11745 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11746 insn = emit_insn (gen_set_got (pic));
11747 RTX_FRAME_RELATED_P (insn) = 1;
11748 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11749 emit_insn (gen_prologue_use (pic));
11750 /* Deleting already emmitted SET_GOT if exist and allocated to
11751 REAL_PIC_OFFSET_TABLE_REGNUM. */
11752 ix86_elim_entry_set_got (pic);
11755 if (crtl->drap_reg && !crtl->stack_realign_needed)
11757 /* vDRAP is setup but after reload it turns out stack realign
11758 isn't necessary, here we will emit prologue to setup DRAP
11759 without stack realign adjustment */
11760 t = choose_baseaddr (0);
11761 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11764 /* Prevent instructions from being scheduled into register save push
11765 sequence when access to the redzone area is done through frame pointer.
11766 The offset between the frame pointer and the stack pointer is calculated
11767 relative to the value of the stack pointer at the end of the function
11768 prologue, and moving instructions that access redzone area via frame
11769 pointer inside push sequence violates this assumption. */
11770 if (frame_pointer_needed && frame.red_zone_size)
11771 emit_insn (gen_memory_blockage ());
11773 /* Emit cld instruction if stringops are used in the function. */
11774 if (TARGET_CLD && ix86_current_function_needs_cld)
11775 emit_insn (gen_cld ());
11777 /* SEH requires that the prologue end within 256 bytes of the start of
11778 the function. Prevent instruction schedules that would extend that.
11779 Further, prevent alloca modifications to the stack pointer from being
11780 combined with prologue modifications. */
11781 if (TARGET_SEH)
11782 emit_insn (gen_prologue_use (stack_pointer_rtx));
11785 /* Emit code to restore REG using a POP insn. */
11787 static void
11788 ix86_emit_restore_reg_using_pop (rtx reg)
11790 struct machine_function *m = cfun->machine;
11791 rtx insn = emit_insn (gen_pop (reg));
11793 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11794 m->fs.sp_offset -= UNITS_PER_WORD;
11796 if (m->fs.cfa_reg == crtl->drap_reg
11797 && REGNO (reg) == REGNO (crtl->drap_reg))
11799 /* Previously we'd represented the CFA as an expression
11800 like *(%ebp - 8). We've just popped that value from
11801 the stack, which means we need to reset the CFA to
11802 the drap register. This will remain until we restore
11803 the stack pointer. */
11804 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11805 RTX_FRAME_RELATED_P (insn) = 1;
11807 /* This means that the DRAP register is valid for addressing too. */
11808 m->fs.drap_valid = true;
11809 return;
11812 if (m->fs.cfa_reg == stack_pointer_rtx)
11814 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11815 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11816 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11817 RTX_FRAME_RELATED_P (insn) = 1;
11819 m->fs.cfa_offset -= UNITS_PER_WORD;
11822 /* When the frame pointer is the CFA, and we pop it, we are
11823 swapping back to the stack pointer as the CFA. This happens
11824 for stack frames that don't allocate other data, so we assume
11825 the stack pointer is now pointing at the return address, i.e.
11826 the function entry state, which makes the offset be 1 word. */
11827 if (reg == hard_frame_pointer_rtx)
11829 m->fs.fp_valid = false;
11830 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11832 m->fs.cfa_reg = stack_pointer_rtx;
11833 m->fs.cfa_offset -= UNITS_PER_WORD;
11835 add_reg_note (insn, REG_CFA_DEF_CFA,
11836 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11837 GEN_INT (m->fs.cfa_offset)));
11838 RTX_FRAME_RELATED_P (insn) = 1;
11843 /* Emit code to restore saved registers using POP insns. */
11845 static void
11846 ix86_emit_restore_regs_using_pop (void)
11848 unsigned int regno;
11850 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11851 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11852 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11855 /* Emit code and notes for the LEAVE instruction. */
11857 static void
11858 ix86_emit_leave (void)
11860 struct machine_function *m = cfun->machine;
11861 rtx insn = emit_insn (ix86_gen_leave ());
11863 ix86_add_queued_cfa_restore_notes (insn);
11865 gcc_assert (m->fs.fp_valid);
11866 m->fs.sp_valid = true;
11867 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11868 m->fs.fp_valid = false;
11870 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11872 m->fs.cfa_reg = stack_pointer_rtx;
11873 m->fs.cfa_offset = m->fs.sp_offset;
11875 add_reg_note (insn, REG_CFA_DEF_CFA,
11876 plus_constant (Pmode, stack_pointer_rtx,
11877 m->fs.sp_offset));
11878 RTX_FRAME_RELATED_P (insn) = 1;
11880 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11881 m->fs.fp_offset);
11884 /* Emit code to restore saved registers using MOV insns.
11885 First register is restored from CFA - CFA_OFFSET. */
11886 static void
11887 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11888 bool maybe_eh_return)
11890 struct machine_function *m = cfun->machine;
11891 unsigned int regno;
11893 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11894 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11896 rtx reg = gen_rtx_REG (word_mode, regno);
11897 rtx insn, mem;
11899 mem = choose_baseaddr (cfa_offset);
11900 mem = gen_frame_mem (word_mode, mem);
11901 insn = emit_move_insn (reg, mem);
11903 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11905 /* Previously we'd represented the CFA as an expression
11906 like *(%ebp - 8). We've just popped that value from
11907 the stack, which means we need to reset the CFA to
11908 the drap register. This will remain until we restore
11909 the stack pointer. */
11910 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11911 RTX_FRAME_RELATED_P (insn) = 1;
11913 /* This means that the DRAP register is valid for addressing. */
11914 m->fs.drap_valid = true;
11916 else
11917 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11919 cfa_offset -= UNITS_PER_WORD;
11923 /* Emit code to restore saved registers using MOV insns.
11924 First register is restored from CFA - CFA_OFFSET. */
11925 static void
11926 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11927 bool maybe_eh_return)
11929 unsigned int regno;
11931 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11932 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11934 rtx reg = gen_rtx_REG (V4SFmode, regno);
11935 rtx mem;
11937 mem = choose_baseaddr (cfa_offset);
11938 mem = gen_rtx_MEM (V4SFmode, mem);
11939 set_mem_align (mem, 128);
11940 emit_move_insn (reg, mem);
11942 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11944 cfa_offset -= 16;
11948 /* Restore function stack, frame, and registers. */
11950 void
11951 ix86_expand_epilogue (int style)
11953 struct machine_function *m = cfun->machine;
11954 struct machine_frame_state frame_state_save = m->fs;
11955 struct ix86_frame frame;
11956 bool restore_regs_via_mov;
11957 bool using_drap;
11959 ix86_finalize_stack_realign_flags ();
11960 ix86_compute_frame_layout (&frame);
11962 m->fs.sp_valid = (!frame_pointer_needed
11963 || (crtl->sp_is_unchanging
11964 && !stack_realign_fp));
11965 gcc_assert (!m->fs.sp_valid
11966 || m->fs.sp_offset == frame.stack_pointer_offset);
11968 /* The FP must be valid if the frame pointer is present. */
11969 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11970 gcc_assert (!m->fs.fp_valid
11971 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11973 /* We must have *some* valid pointer to the stack frame. */
11974 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11976 /* The DRAP is never valid at this point. */
11977 gcc_assert (!m->fs.drap_valid);
11979 /* See the comment about red zone and frame
11980 pointer usage in ix86_expand_prologue. */
11981 if (frame_pointer_needed && frame.red_zone_size)
11982 emit_insn (gen_memory_blockage ());
11984 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11985 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11987 /* Determine the CFA offset of the end of the red-zone. */
11988 m->fs.red_zone_offset = 0;
11989 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11991 /* The red-zone begins below the return address. */
11992 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11994 /* When the register save area is in the aligned portion of
11995 the stack, determine the maximum runtime displacement that
11996 matches up with the aligned frame. */
11997 if (stack_realign_drap)
11998 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11999 + UNITS_PER_WORD);
12002 /* Special care must be taken for the normal return case of a function
12003 using eh_return: the eax and edx registers are marked as saved, but
12004 not restored along this path. Adjust the save location to match. */
12005 if (crtl->calls_eh_return && style != 2)
12006 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12008 /* EH_RETURN requires the use of moves to function properly. */
12009 if (crtl->calls_eh_return)
12010 restore_regs_via_mov = true;
12011 /* SEH requires the use of pops to identify the epilogue. */
12012 else if (TARGET_SEH)
12013 restore_regs_via_mov = false;
12014 /* If we're only restoring one register and sp is not valid then
12015 using a move instruction to restore the register since it's
12016 less work than reloading sp and popping the register. */
12017 else if (!m->fs.sp_valid && frame.nregs <= 1)
12018 restore_regs_via_mov = true;
12019 else if (TARGET_EPILOGUE_USING_MOVE
12020 && cfun->machine->use_fast_prologue_epilogue
12021 && (frame.nregs > 1
12022 || m->fs.sp_offset != frame.reg_save_offset))
12023 restore_regs_via_mov = true;
12024 else if (frame_pointer_needed
12025 && !frame.nregs
12026 && m->fs.sp_offset != frame.reg_save_offset)
12027 restore_regs_via_mov = true;
12028 else if (frame_pointer_needed
12029 && TARGET_USE_LEAVE
12030 && cfun->machine->use_fast_prologue_epilogue
12031 && frame.nregs == 1)
12032 restore_regs_via_mov = true;
12033 else
12034 restore_regs_via_mov = false;
12036 if (restore_regs_via_mov || frame.nsseregs)
12038 /* Ensure that the entire register save area is addressable via
12039 the stack pointer, if we will restore via sp. */
12040 if (TARGET_64BIT
12041 && m->fs.sp_offset > 0x7fffffff
12042 && !(m->fs.fp_valid || m->fs.drap_valid)
12043 && (frame.nsseregs + frame.nregs) != 0)
12045 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12046 GEN_INT (m->fs.sp_offset
12047 - frame.sse_reg_save_offset),
12048 style,
12049 m->fs.cfa_reg == stack_pointer_rtx);
12053 /* If there are any SSE registers to restore, then we have to do it
12054 via moves, since there's obviously no pop for SSE regs. */
12055 if (frame.nsseregs)
12056 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12057 style == 2);
12059 if (restore_regs_via_mov)
12061 rtx t;
12063 if (frame.nregs)
12064 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12066 /* eh_return epilogues need %ecx added to the stack pointer. */
12067 if (style == 2)
12069 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12071 /* Stack align doesn't work with eh_return. */
12072 gcc_assert (!stack_realign_drap);
12073 /* Neither does regparm nested functions. */
12074 gcc_assert (!ix86_static_chain_on_stack);
12076 if (frame_pointer_needed)
12078 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12079 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12080 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12082 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12083 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12085 /* Note that we use SA as a temporary CFA, as the return
12086 address is at the proper place relative to it. We
12087 pretend this happens at the FP restore insn because
12088 prior to this insn the FP would be stored at the wrong
12089 offset relative to SA, and after this insn we have no
12090 other reasonable register to use for the CFA. We don't
12091 bother resetting the CFA to the SP for the duration of
12092 the return insn. */
12093 add_reg_note (insn, REG_CFA_DEF_CFA,
12094 plus_constant (Pmode, sa, UNITS_PER_WORD));
12095 ix86_add_queued_cfa_restore_notes (insn);
12096 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12097 RTX_FRAME_RELATED_P (insn) = 1;
12099 m->fs.cfa_reg = sa;
12100 m->fs.cfa_offset = UNITS_PER_WORD;
12101 m->fs.fp_valid = false;
12103 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12104 const0_rtx, style, false);
12106 else
12108 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12109 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12110 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12111 ix86_add_queued_cfa_restore_notes (insn);
12113 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12114 if (m->fs.cfa_offset != UNITS_PER_WORD)
12116 m->fs.cfa_offset = UNITS_PER_WORD;
12117 add_reg_note (insn, REG_CFA_DEF_CFA,
12118 plus_constant (Pmode, stack_pointer_rtx,
12119 UNITS_PER_WORD));
12120 RTX_FRAME_RELATED_P (insn) = 1;
12123 m->fs.sp_offset = UNITS_PER_WORD;
12124 m->fs.sp_valid = true;
12127 else
12129 /* SEH requires that the function end with (1) a stack adjustment
12130 if necessary, (2) a sequence of pops, and (3) a return or
12131 jump instruction. Prevent insns from the function body from
12132 being scheduled into this sequence. */
12133 if (TARGET_SEH)
12135 /* Prevent a catch region from being adjacent to the standard
12136 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12137 several other flags that would be interesting to test are
12138 not yet set up. */
12139 if (flag_non_call_exceptions)
12140 emit_insn (gen_nops (const1_rtx));
12141 else
12142 emit_insn (gen_blockage ());
12145 /* First step is to deallocate the stack frame so that we can
12146 pop the registers. Also do it on SEH target for very large
12147 frame as the emitted instructions aren't allowed by the ABI in
12148 epilogues. */
12149 if (!m->fs.sp_valid
12150 || (TARGET_SEH
12151 && (m->fs.sp_offset - frame.reg_save_offset
12152 >= SEH_MAX_FRAME_SIZE)))
12154 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12155 GEN_INT (m->fs.fp_offset
12156 - frame.reg_save_offset),
12157 style, false);
12159 else if (m->fs.sp_offset != frame.reg_save_offset)
12161 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12162 GEN_INT (m->fs.sp_offset
12163 - frame.reg_save_offset),
12164 style,
12165 m->fs.cfa_reg == stack_pointer_rtx);
12168 ix86_emit_restore_regs_using_pop ();
12171 /* If we used a stack pointer and haven't already got rid of it,
12172 then do so now. */
12173 if (m->fs.fp_valid)
12175 /* If the stack pointer is valid and pointing at the frame
12176 pointer store address, then we only need a pop. */
12177 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12178 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12179 /* Leave results in shorter dependency chains on CPUs that are
12180 able to grok it fast. */
12181 else if (TARGET_USE_LEAVE
12182 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12183 || !cfun->machine->use_fast_prologue_epilogue)
12184 ix86_emit_leave ();
12185 else
12187 pro_epilogue_adjust_stack (stack_pointer_rtx,
12188 hard_frame_pointer_rtx,
12189 const0_rtx, style, !using_drap);
12190 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12194 if (using_drap)
12196 int param_ptr_offset = UNITS_PER_WORD;
12197 rtx insn;
12199 gcc_assert (stack_realign_drap);
12201 if (ix86_static_chain_on_stack)
12202 param_ptr_offset += UNITS_PER_WORD;
12203 if (!call_used_regs[REGNO (crtl->drap_reg)])
12204 param_ptr_offset += UNITS_PER_WORD;
12206 insn = emit_insn (gen_rtx_SET
12207 (VOIDmode, stack_pointer_rtx,
12208 gen_rtx_PLUS (Pmode,
12209 crtl->drap_reg,
12210 GEN_INT (-param_ptr_offset))));
12211 m->fs.cfa_reg = stack_pointer_rtx;
12212 m->fs.cfa_offset = param_ptr_offset;
12213 m->fs.sp_offset = param_ptr_offset;
12214 m->fs.realigned = false;
12216 add_reg_note (insn, REG_CFA_DEF_CFA,
12217 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12218 GEN_INT (param_ptr_offset)));
12219 RTX_FRAME_RELATED_P (insn) = 1;
12221 if (!call_used_regs[REGNO (crtl->drap_reg)])
12222 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12225 /* At this point the stack pointer must be valid, and we must have
12226 restored all of the registers. We may not have deallocated the
12227 entire stack frame. We've delayed this until now because it may
12228 be possible to merge the local stack deallocation with the
12229 deallocation forced by ix86_static_chain_on_stack. */
12230 gcc_assert (m->fs.sp_valid);
12231 gcc_assert (!m->fs.fp_valid);
12232 gcc_assert (!m->fs.realigned);
12233 if (m->fs.sp_offset != UNITS_PER_WORD)
12235 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12236 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12237 style, true);
12239 else
12240 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12242 /* Sibcall epilogues don't want a return instruction. */
12243 if (style == 0)
12245 m->fs = frame_state_save;
12246 return;
12249 if (crtl->args.pops_args && crtl->args.size)
12251 rtx popc = GEN_INT (crtl->args.pops_args);
12253 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12254 address, do explicit add, and jump indirectly to the caller. */
12256 if (crtl->args.pops_args >= 65536)
12258 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12259 rtx insn;
12261 /* There is no "pascal" calling convention in any 64bit ABI. */
12262 gcc_assert (!TARGET_64BIT);
12264 insn = emit_insn (gen_pop (ecx));
12265 m->fs.cfa_offset -= UNITS_PER_WORD;
12266 m->fs.sp_offset -= UNITS_PER_WORD;
12268 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12269 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12270 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12271 add_reg_note (insn, REG_CFA_REGISTER,
12272 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12273 RTX_FRAME_RELATED_P (insn) = 1;
12275 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12276 popc, -1, true);
12277 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12279 else
12280 emit_jump_insn (gen_simple_return_pop_internal (popc));
12282 else
12283 emit_jump_insn (gen_simple_return_internal ());
12285 /* Restore the state back to the state from the prologue,
12286 so that it's correct for the next epilogue. */
12287 m->fs = frame_state_save;
12290 /* Reset from the function's potential modifications. */
12292 static void
12293 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12295 if (pic_offset_table_rtx
12296 && !ix86_use_pseudo_pic_reg ())
12297 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12298 #if TARGET_MACHO
12299 /* Mach-O doesn't support labels at the end of objects, so if
12300 it looks like we might want one, insert a NOP. */
12302 rtx_insn *insn = get_last_insn ();
12303 rtx_insn *deleted_debug_label = NULL;
12304 while (insn
12305 && NOTE_P (insn)
12306 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12308 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12309 notes only, instead set their CODE_LABEL_NUMBER to -1,
12310 otherwise there would be code generation differences
12311 in between -g and -g0. */
12312 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12313 deleted_debug_label = insn;
12314 insn = PREV_INSN (insn);
12316 if (insn
12317 && (LABEL_P (insn)
12318 || (NOTE_P (insn)
12319 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12320 fputs ("\tnop\n", file);
12321 else if (deleted_debug_label)
12322 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12323 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12324 CODE_LABEL_NUMBER (insn) = -1;
12326 #endif
12330 /* Return a scratch register to use in the split stack prologue. The
12331 split stack prologue is used for -fsplit-stack. It is the first
12332 instructions in the function, even before the regular prologue.
12333 The scratch register can be any caller-saved register which is not
12334 used for parameters or for the static chain. */
12336 static unsigned int
12337 split_stack_prologue_scratch_regno (void)
12339 if (TARGET_64BIT)
12340 return R11_REG;
12341 else
12343 bool is_fastcall, is_thiscall;
12344 int regparm;
12346 is_fastcall = (lookup_attribute ("fastcall",
12347 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12348 != NULL);
12349 is_thiscall = (lookup_attribute ("thiscall",
12350 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12351 != NULL);
12352 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12354 if (is_fastcall)
12356 if (DECL_STATIC_CHAIN (cfun->decl))
12358 sorry ("-fsplit-stack does not support fastcall with "
12359 "nested function");
12360 return INVALID_REGNUM;
12362 return AX_REG;
12364 else if (is_thiscall)
12366 if (!DECL_STATIC_CHAIN (cfun->decl))
12367 return DX_REG;
12368 return AX_REG;
12370 else if (regparm < 3)
12372 if (!DECL_STATIC_CHAIN (cfun->decl))
12373 return CX_REG;
12374 else
12376 if (regparm >= 2)
12378 sorry ("-fsplit-stack does not support 2 register "
12379 "parameters for a nested function");
12380 return INVALID_REGNUM;
12382 return DX_REG;
12385 else
12387 /* FIXME: We could make this work by pushing a register
12388 around the addition and comparison. */
12389 sorry ("-fsplit-stack does not support 3 register parameters");
12390 return INVALID_REGNUM;
12395 /* A SYMBOL_REF for the function which allocates new stackspace for
12396 -fsplit-stack. */
12398 static GTY(()) rtx split_stack_fn;
12400 /* A SYMBOL_REF for the more stack function when using the large
12401 model. */
12403 static GTY(()) rtx split_stack_fn_large;
12405 /* Handle -fsplit-stack. These are the first instructions in the
12406 function, even before the regular prologue. */
12408 void
12409 ix86_expand_split_stack_prologue (void)
12411 struct ix86_frame frame;
12412 HOST_WIDE_INT allocate;
12413 unsigned HOST_WIDE_INT args_size;
12414 rtx_code_label *label;
12415 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12416 rtx scratch_reg = NULL_RTX;
12417 rtx_code_label *varargs_label = NULL;
12418 rtx fn;
12420 gcc_assert (flag_split_stack && reload_completed);
12422 ix86_finalize_stack_realign_flags ();
12423 ix86_compute_frame_layout (&frame);
12424 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12426 /* This is the label we will branch to if we have enough stack
12427 space. We expect the basic block reordering pass to reverse this
12428 branch if optimizing, so that we branch in the unlikely case. */
12429 label = gen_label_rtx ();
12431 /* We need to compare the stack pointer minus the frame size with
12432 the stack boundary in the TCB. The stack boundary always gives
12433 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12434 can compare directly. Otherwise we need to do an addition. */
12436 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12437 UNSPEC_STACK_CHECK);
12438 limit = gen_rtx_CONST (Pmode, limit);
12439 limit = gen_rtx_MEM (Pmode, limit);
12440 if (allocate < SPLIT_STACK_AVAILABLE)
12441 current = stack_pointer_rtx;
12442 else
12444 unsigned int scratch_regno;
12445 rtx offset;
12447 /* We need a scratch register to hold the stack pointer minus
12448 the required frame size. Since this is the very start of the
12449 function, the scratch register can be any caller-saved
12450 register which is not used for parameters. */
12451 offset = GEN_INT (- allocate);
12452 scratch_regno = split_stack_prologue_scratch_regno ();
12453 if (scratch_regno == INVALID_REGNUM)
12454 return;
12455 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12456 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12458 /* We don't use ix86_gen_add3 in this case because it will
12459 want to split to lea, but when not optimizing the insn
12460 will not be split after this point. */
12461 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12462 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12463 offset)));
12465 else
12467 emit_move_insn (scratch_reg, offset);
12468 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12469 stack_pointer_rtx));
12471 current = scratch_reg;
12474 ix86_expand_branch (GEU, current, limit, label);
12475 jump_insn = get_last_insn ();
12476 JUMP_LABEL (jump_insn) = label;
12478 /* Mark the jump as very likely to be taken. */
12479 add_int_reg_note (jump_insn, REG_BR_PROB,
12480 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12482 if (split_stack_fn == NULL_RTX)
12484 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12485 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12487 fn = split_stack_fn;
12489 /* Get more stack space. We pass in the desired stack space and the
12490 size of the arguments to copy to the new stack. In 32-bit mode
12491 we push the parameters; __morestack will return on a new stack
12492 anyhow. In 64-bit mode we pass the parameters in r10 and
12493 r11. */
12494 allocate_rtx = GEN_INT (allocate);
12495 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12496 call_fusage = NULL_RTX;
12497 if (TARGET_64BIT)
12499 rtx reg10, reg11;
12501 reg10 = gen_rtx_REG (Pmode, R10_REG);
12502 reg11 = gen_rtx_REG (Pmode, R11_REG);
12504 /* If this function uses a static chain, it will be in %r10.
12505 Preserve it across the call to __morestack. */
12506 if (DECL_STATIC_CHAIN (cfun->decl))
12508 rtx rax;
12510 rax = gen_rtx_REG (word_mode, AX_REG);
12511 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12512 use_reg (&call_fusage, rax);
12515 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12516 && !TARGET_PECOFF)
12518 HOST_WIDE_INT argval;
12520 gcc_assert (Pmode == DImode);
12521 /* When using the large model we need to load the address
12522 into a register, and we've run out of registers. So we
12523 switch to a different calling convention, and we call a
12524 different function: __morestack_large. We pass the
12525 argument size in the upper 32 bits of r10 and pass the
12526 frame size in the lower 32 bits. */
12527 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12528 gcc_assert ((args_size & 0xffffffff) == args_size);
12530 if (split_stack_fn_large == NULL_RTX)
12532 split_stack_fn_large =
12533 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12534 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12536 if (ix86_cmodel == CM_LARGE_PIC)
12538 rtx_code_label *label;
12539 rtx x;
12541 label = gen_label_rtx ();
12542 emit_label (label);
12543 LABEL_PRESERVE_P (label) = 1;
12544 emit_insn (gen_set_rip_rex64 (reg10, label));
12545 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12546 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12547 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12548 UNSPEC_GOT);
12549 x = gen_rtx_CONST (Pmode, x);
12550 emit_move_insn (reg11, x);
12551 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12552 x = gen_const_mem (Pmode, x);
12553 emit_move_insn (reg11, x);
12555 else
12556 emit_move_insn (reg11, split_stack_fn_large);
12558 fn = reg11;
12560 argval = ((args_size << 16) << 16) + allocate;
12561 emit_move_insn (reg10, GEN_INT (argval));
12563 else
12565 emit_move_insn (reg10, allocate_rtx);
12566 emit_move_insn (reg11, GEN_INT (args_size));
12567 use_reg (&call_fusage, reg11);
12570 use_reg (&call_fusage, reg10);
12572 else
12574 emit_insn (gen_push (GEN_INT (args_size)));
12575 emit_insn (gen_push (allocate_rtx));
12577 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12578 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12579 NULL_RTX, false);
12580 add_function_usage_to (call_insn, call_fusage);
12582 /* In order to make call/return prediction work right, we now need
12583 to execute a return instruction. See
12584 libgcc/config/i386/morestack.S for the details on how this works.
12586 For flow purposes gcc must not see this as a return
12587 instruction--we need control flow to continue at the subsequent
12588 label. Therefore, we use an unspec. */
12589 gcc_assert (crtl->args.pops_args < 65536);
12590 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12592 /* If we are in 64-bit mode and this function uses a static chain,
12593 we saved %r10 in %rax before calling _morestack. */
12594 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12595 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12596 gen_rtx_REG (word_mode, AX_REG));
12598 /* If this function calls va_start, we need to store a pointer to
12599 the arguments on the old stack, because they may not have been
12600 all copied to the new stack. At this point the old stack can be
12601 found at the frame pointer value used by __morestack, because
12602 __morestack has set that up before calling back to us. Here we
12603 store that pointer in a scratch register, and in
12604 ix86_expand_prologue we store the scratch register in a stack
12605 slot. */
12606 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12608 unsigned int scratch_regno;
12609 rtx frame_reg;
12610 int words;
12612 scratch_regno = split_stack_prologue_scratch_regno ();
12613 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12614 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12616 /* 64-bit:
12617 fp -> old fp value
12618 return address within this function
12619 return address of caller of this function
12620 stack arguments
12621 So we add three words to get to the stack arguments.
12623 32-bit:
12624 fp -> old fp value
12625 return address within this function
12626 first argument to __morestack
12627 second argument to __morestack
12628 return address of caller of this function
12629 stack arguments
12630 So we add five words to get to the stack arguments.
12632 words = TARGET_64BIT ? 3 : 5;
12633 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12634 gen_rtx_PLUS (Pmode, frame_reg,
12635 GEN_INT (words * UNITS_PER_WORD))));
12637 varargs_label = gen_label_rtx ();
12638 emit_jump_insn (gen_jump (varargs_label));
12639 JUMP_LABEL (get_last_insn ()) = varargs_label;
12641 emit_barrier ();
12644 emit_label (label);
12645 LABEL_NUSES (label) = 1;
12647 /* If this function calls va_start, we now have to set the scratch
12648 register for the case where we do not call __morestack. In this
12649 case we need to set it based on the stack pointer. */
12650 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12652 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12653 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12654 GEN_INT (UNITS_PER_WORD))));
12656 emit_label (varargs_label);
12657 LABEL_NUSES (varargs_label) = 1;
12661 /* We may have to tell the dataflow pass that the split stack prologue
12662 is initializing a scratch register. */
12664 static void
12665 ix86_live_on_entry (bitmap regs)
12667 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12669 gcc_assert (flag_split_stack);
12670 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12674 /* Extract the parts of an RTL expression that is a valid memory address
12675 for an instruction. Return 0 if the structure of the address is
12676 grossly off. Return -1 if the address contains ASHIFT, so it is not
12677 strictly valid, but still used for computing length of lea instruction. */
12680 ix86_decompose_address (rtx addr, struct ix86_address *out)
12682 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12683 rtx base_reg, index_reg;
12684 HOST_WIDE_INT scale = 1;
12685 rtx scale_rtx = NULL_RTX;
12686 rtx tmp;
12687 int retval = 1;
12688 enum ix86_address_seg seg = SEG_DEFAULT;
12690 /* Allow zero-extended SImode addresses,
12691 they will be emitted with addr32 prefix. */
12692 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12694 if (GET_CODE (addr) == ZERO_EXTEND
12695 && GET_MODE (XEXP (addr, 0)) == SImode)
12697 addr = XEXP (addr, 0);
12698 if (CONST_INT_P (addr))
12699 return 0;
12701 else if (GET_CODE (addr) == AND
12702 && const_32bit_mask (XEXP (addr, 1), DImode))
12704 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12705 if (addr == NULL_RTX)
12706 return 0;
12708 if (CONST_INT_P (addr))
12709 return 0;
12713 /* Allow SImode subregs of DImode addresses,
12714 they will be emitted with addr32 prefix. */
12715 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12717 if (GET_CODE (addr) == SUBREG
12718 && GET_MODE (SUBREG_REG (addr)) == DImode)
12720 addr = SUBREG_REG (addr);
12721 if (CONST_INT_P (addr))
12722 return 0;
12726 if (REG_P (addr))
12727 base = addr;
12728 else if (GET_CODE (addr) == SUBREG)
12730 if (REG_P (SUBREG_REG (addr)))
12731 base = addr;
12732 else
12733 return 0;
12735 else if (GET_CODE (addr) == PLUS)
12737 rtx addends[4], op;
12738 int n = 0, i;
12740 op = addr;
12743 if (n >= 4)
12744 return 0;
12745 addends[n++] = XEXP (op, 1);
12746 op = XEXP (op, 0);
12748 while (GET_CODE (op) == PLUS);
12749 if (n >= 4)
12750 return 0;
12751 addends[n] = op;
12753 for (i = n; i >= 0; --i)
12755 op = addends[i];
12756 switch (GET_CODE (op))
12758 case MULT:
12759 if (index)
12760 return 0;
12761 index = XEXP (op, 0);
12762 scale_rtx = XEXP (op, 1);
12763 break;
12765 case ASHIFT:
12766 if (index)
12767 return 0;
12768 index = XEXP (op, 0);
12769 tmp = XEXP (op, 1);
12770 if (!CONST_INT_P (tmp))
12771 return 0;
12772 scale = INTVAL (tmp);
12773 if ((unsigned HOST_WIDE_INT) scale > 3)
12774 return 0;
12775 scale = 1 << scale;
12776 break;
12778 case ZERO_EXTEND:
12779 op = XEXP (op, 0);
12780 if (GET_CODE (op) != UNSPEC)
12781 return 0;
12782 /* FALLTHRU */
12784 case UNSPEC:
12785 if (XINT (op, 1) == UNSPEC_TP
12786 && TARGET_TLS_DIRECT_SEG_REFS
12787 && seg == SEG_DEFAULT)
12788 seg = DEFAULT_TLS_SEG_REG;
12789 else
12790 return 0;
12791 break;
12793 case SUBREG:
12794 if (!REG_P (SUBREG_REG (op)))
12795 return 0;
12796 /* FALLTHRU */
12798 case REG:
12799 if (!base)
12800 base = op;
12801 else if (!index)
12802 index = op;
12803 else
12804 return 0;
12805 break;
12807 case CONST:
12808 case CONST_INT:
12809 case SYMBOL_REF:
12810 case LABEL_REF:
12811 if (disp)
12812 return 0;
12813 disp = op;
12814 break;
12816 default:
12817 return 0;
12821 else if (GET_CODE (addr) == MULT)
12823 index = XEXP (addr, 0); /* index*scale */
12824 scale_rtx = XEXP (addr, 1);
12826 else if (GET_CODE (addr) == ASHIFT)
12828 /* We're called for lea too, which implements ashift on occasion. */
12829 index = XEXP (addr, 0);
12830 tmp = XEXP (addr, 1);
12831 if (!CONST_INT_P (tmp))
12832 return 0;
12833 scale = INTVAL (tmp);
12834 if ((unsigned HOST_WIDE_INT) scale > 3)
12835 return 0;
12836 scale = 1 << scale;
12837 retval = -1;
12839 else
12840 disp = addr; /* displacement */
12842 if (index)
12844 if (REG_P (index))
12846 else if (GET_CODE (index) == SUBREG
12847 && REG_P (SUBREG_REG (index)))
12849 else
12850 return 0;
12853 /* Extract the integral value of scale. */
12854 if (scale_rtx)
12856 if (!CONST_INT_P (scale_rtx))
12857 return 0;
12858 scale = INTVAL (scale_rtx);
12861 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12862 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12864 /* Avoid useless 0 displacement. */
12865 if (disp == const0_rtx && (base || index))
12866 disp = NULL_RTX;
12868 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12869 if (base_reg && index_reg && scale == 1
12870 && (index_reg == arg_pointer_rtx
12871 || index_reg == frame_pointer_rtx
12872 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12874 std::swap (base, index);
12875 std::swap (base_reg, index_reg);
12878 /* Special case: %ebp cannot be encoded as a base without a displacement.
12879 Similarly %r13. */
12880 if (!disp
12881 && base_reg
12882 && (base_reg == hard_frame_pointer_rtx
12883 || base_reg == frame_pointer_rtx
12884 || base_reg == arg_pointer_rtx
12885 || (REG_P (base_reg)
12886 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12887 || REGNO (base_reg) == R13_REG))))
12888 disp = const0_rtx;
12890 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12891 Avoid this by transforming to [%esi+0].
12892 Reload calls address legitimization without cfun defined, so we need
12893 to test cfun for being non-NULL. */
12894 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12895 && base_reg && !index_reg && !disp
12896 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12897 disp = const0_rtx;
12899 /* Special case: encode reg+reg instead of reg*2. */
12900 if (!base && index && scale == 2)
12901 base = index, base_reg = index_reg, scale = 1;
12903 /* Special case: scaling cannot be encoded without base or displacement. */
12904 if (!base && !disp && index && scale != 1)
12905 disp = const0_rtx;
12907 out->base = base;
12908 out->index = index;
12909 out->disp = disp;
12910 out->scale = scale;
12911 out->seg = seg;
12913 return retval;
12916 /* Return cost of the memory address x.
12917 For i386, it is better to use a complex address than let gcc copy
12918 the address into a reg and make a new pseudo. But not if the address
12919 requires to two regs - that would mean more pseudos with longer
12920 lifetimes. */
12921 static int
12922 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12924 struct ix86_address parts;
12925 int cost = 1;
12926 int ok = ix86_decompose_address (x, &parts);
12928 gcc_assert (ok);
12930 if (parts.base && GET_CODE (parts.base) == SUBREG)
12931 parts.base = SUBREG_REG (parts.base);
12932 if (parts.index && GET_CODE (parts.index) == SUBREG)
12933 parts.index = SUBREG_REG (parts.index);
12935 /* Attempt to minimize number of registers in the address by increasing
12936 address cost for each used register. We don't increase address cost
12937 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
12938 is not invariant itself it most likely means that base or index is not
12939 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
12940 which is not profitable for x86. */
12941 if (parts.base
12942 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12943 && (current_pass->type == GIMPLE_PASS
12944 || !pic_offset_table_rtx
12945 || !REG_P (parts.base)
12946 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
12947 cost++;
12949 if (parts.index
12950 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12951 && (current_pass->type == GIMPLE_PASS
12952 || !pic_offset_table_rtx
12953 || !REG_P (parts.index)
12954 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
12955 cost++;
12957 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12958 since it's predecode logic can't detect the length of instructions
12959 and it degenerates to vector decoded. Increase cost of such
12960 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12961 to split such addresses or even refuse such addresses at all.
12963 Following addressing modes are affected:
12964 [base+scale*index]
12965 [scale*index+disp]
12966 [base+index]
12968 The first and last case may be avoidable by explicitly coding the zero in
12969 memory address, but I don't have AMD-K6 machine handy to check this
12970 theory. */
12972 if (TARGET_K6
12973 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12974 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12975 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12976 cost += 10;
12978 return cost;
12981 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12982 this is used for to form addresses to local data when -fPIC is in
12983 use. */
12985 static bool
12986 darwin_local_data_pic (rtx disp)
12988 return (GET_CODE (disp) == UNSPEC
12989 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12992 /* Determine if a given RTX is a valid constant. We already know this
12993 satisfies CONSTANT_P. */
12995 static bool
12996 ix86_legitimate_constant_p (machine_mode, rtx x)
12998 /* Pointer bounds constants are not valid. */
12999 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13000 return false;
13002 switch (GET_CODE (x))
13004 case CONST:
13005 x = XEXP (x, 0);
13007 if (GET_CODE (x) == PLUS)
13009 if (!CONST_INT_P (XEXP (x, 1)))
13010 return false;
13011 x = XEXP (x, 0);
13014 if (TARGET_MACHO && darwin_local_data_pic (x))
13015 return true;
13017 /* Only some unspecs are valid as "constants". */
13018 if (GET_CODE (x) == UNSPEC)
13019 switch (XINT (x, 1))
13021 case UNSPEC_GOT:
13022 case UNSPEC_GOTOFF:
13023 case UNSPEC_PLTOFF:
13024 return TARGET_64BIT;
13025 case UNSPEC_TPOFF:
13026 case UNSPEC_NTPOFF:
13027 x = XVECEXP (x, 0, 0);
13028 return (GET_CODE (x) == SYMBOL_REF
13029 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13030 case UNSPEC_DTPOFF:
13031 x = XVECEXP (x, 0, 0);
13032 return (GET_CODE (x) == SYMBOL_REF
13033 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13034 default:
13035 return false;
13038 /* We must have drilled down to a symbol. */
13039 if (GET_CODE (x) == LABEL_REF)
13040 return true;
13041 if (GET_CODE (x) != SYMBOL_REF)
13042 return false;
13043 /* FALLTHRU */
13045 case SYMBOL_REF:
13046 /* TLS symbols are never valid. */
13047 if (SYMBOL_REF_TLS_MODEL (x))
13048 return false;
13050 /* DLLIMPORT symbols are never valid. */
13051 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13052 && SYMBOL_REF_DLLIMPORT_P (x))
13053 return false;
13055 #if TARGET_MACHO
13056 /* mdynamic-no-pic */
13057 if (MACHO_DYNAMIC_NO_PIC_P)
13058 return machopic_symbol_defined_p (x);
13059 #endif
13060 break;
13062 case CONST_DOUBLE:
13063 if (GET_MODE (x) == TImode
13064 && x != CONST0_RTX (TImode)
13065 && !TARGET_64BIT)
13066 return false;
13067 break;
13069 case CONST_VECTOR:
13070 if (!standard_sse_constant_p (x))
13071 return false;
13073 default:
13074 break;
13077 /* Otherwise we handle everything else in the move patterns. */
13078 return true;
13081 /* Determine if it's legal to put X into the constant pool. This
13082 is not possible for the address of thread-local symbols, which
13083 is checked above. */
13085 static bool
13086 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13088 /* We can always put integral constants and vectors in memory. */
13089 switch (GET_CODE (x))
13091 case CONST_INT:
13092 case CONST_DOUBLE:
13093 case CONST_VECTOR:
13094 return false;
13096 default:
13097 break;
13099 return !ix86_legitimate_constant_p (mode, x);
13102 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13103 otherwise zero. */
13105 static bool
13106 is_imported_p (rtx x)
13108 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13109 || GET_CODE (x) != SYMBOL_REF)
13110 return false;
13112 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13116 /* Nonzero if the constant value X is a legitimate general operand
13117 when generating PIC code. It is given that flag_pic is on and
13118 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13120 bool
13121 legitimate_pic_operand_p (rtx x)
13123 rtx inner;
13125 switch (GET_CODE (x))
13127 case CONST:
13128 inner = XEXP (x, 0);
13129 if (GET_CODE (inner) == PLUS
13130 && CONST_INT_P (XEXP (inner, 1)))
13131 inner = XEXP (inner, 0);
13133 /* Only some unspecs are valid as "constants". */
13134 if (GET_CODE (inner) == UNSPEC)
13135 switch (XINT (inner, 1))
13137 case UNSPEC_GOT:
13138 case UNSPEC_GOTOFF:
13139 case UNSPEC_PLTOFF:
13140 return TARGET_64BIT;
13141 case UNSPEC_TPOFF:
13142 x = XVECEXP (inner, 0, 0);
13143 return (GET_CODE (x) == SYMBOL_REF
13144 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13145 case UNSPEC_MACHOPIC_OFFSET:
13146 return legitimate_pic_address_disp_p (x);
13147 default:
13148 return false;
13150 /* FALLTHRU */
13152 case SYMBOL_REF:
13153 case LABEL_REF:
13154 return legitimate_pic_address_disp_p (x);
13156 default:
13157 return true;
13161 /* Determine if a given CONST RTX is a valid memory displacement
13162 in PIC mode. */
13164 bool
13165 legitimate_pic_address_disp_p (rtx disp)
13167 bool saw_plus;
13169 /* In 64bit mode we can allow direct addresses of symbols and labels
13170 when they are not dynamic symbols. */
13171 if (TARGET_64BIT)
13173 rtx op0 = disp, op1;
13175 switch (GET_CODE (disp))
13177 case LABEL_REF:
13178 return true;
13180 case CONST:
13181 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13182 break;
13183 op0 = XEXP (XEXP (disp, 0), 0);
13184 op1 = XEXP (XEXP (disp, 0), 1);
13185 if (!CONST_INT_P (op1)
13186 || INTVAL (op1) >= 16*1024*1024
13187 || INTVAL (op1) < -16*1024*1024)
13188 break;
13189 if (GET_CODE (op0) == LABEL_REF)
13190 return true;
13191 if (GET_CODE (op0) == CONST
13192 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13193 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13194 return true;
13195 if (GET_CODE (op0) == UNSPEC
13196 && XINT (op0, 1) == UNSPEC_PCREL)
13197 return true;
13198 if (GET_CODE (op0) != SYMBOL_REF)
13199 break;
13200 /* FALLTHRU */
13202 case SYMBOL_REF:
13203 /* TLS references should always be enclosed in UNSPEC.
13204 The dllimported symbol needs always to be resolved. */
13205 if (SYMBOL_REF_TLS_MODEL (op0)
13206 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13207 return false;
13209 if (TARGET_PECOFF)
13211 if (is_imported_p (op0))
13212 return true;
13214 if (SYMBOL_REF_FAR_ADDR_P (op0)
13215 || !SYMBOL_REF_LOCAL_P (op0))
13216 break;
13218 /* Function-symbols need to be resolved only for
13219 large-model.
13220 For the small-model we don't need to resolve anything
13221 here. */
13222 if ((ix86_cmodel != CM_LARGE_PIC
13223 && SYMBOL_REF_FUNCTION_P (op0))
13224 || ix86_cmodel == CM_SMALL_PIC)
13225 return true;
13226 /* Non-external symbols don't need to be resolved for
13227 large, and medium-model. */
13228 if ((ix86_cmodel == CM_LARGE_PIC
13229 || ix86_cmodel == CM_MEDIUM_PIC)
13230 && !SYMBOL_REF_EXTERNAL_P (op0))
13231 return true;
13233 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13234 && (SYMBOL_REF_LOCAL_P (op0)
13235 || (HAVE_LD_PIE_COPYRELOC
13236 && flag_pie
13237 && !SYMBOL_REF_WEAK (op0)
13238 && !SYMBOL_REF_FUNCTION_P (op0)))
13239 && ix86_cmodel != CM_LARGE_PIC)
13240 return true;
13241 break;
13243 default:
13244 break;
13247 if (GET_CODE (disp) != CONST)
13248 return false;
13249 disp = XEXP (disp, 0);
13251 if (TARGET_64BIT)
13253 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13254 of GOT tables. We should not need these anyway. */
13255 if (GET_CODE (disp) != UNSPEC
13256 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13257 && XINT (disp, 1) != UNSPEC_GOTOFF
13258 && XINT (disp, 1) != UNSPEC_PCREL
13259 && XINT (disp, 1) != UNSPEC_PLTOFF))
13260 return false;
13262 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13263 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13264 return false;
13265 return true;
13268 saw_plus = false;
13269 if (GET_CODE (disp) == PLUS)
13271 if (!CONST_INT_P (XEXP (disp, 1)))
13272 return false;
13273 disp = XEXP (disp, 0);
13274 saw_plus = true;
13277 if (TARGET_MACHO && darwin_local_data_pic (disp))
13278 return true;
13280 if (GET_CODE (disp) != UNSPEC)
13281 return false;
13283 switch (XINT (disp, 1))
13285 case UNSPEC_GOT:
13286 if (saw_plus)
13287 return false;
13288 /* We need to check for both symbols and labels because VxWorks loads
13289 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13290 details. */
13291 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13292 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13293 case UNSPEC_GOTOFF:
13294 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13295 While ABI specify also 32bit relocation but we don't produce it in
13296 small PIC model at all. */
13297 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13298 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13299 && !TARGET_64BIT)
13300 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13301 return false;
13302 case UNSPEC_GOTTPOFF:
13303 case UNSPEC_GOTNTPOFF:
13304 case UNSPEC_INDNTPOFF:
13305 if (saw_plus)
13306 return false;
13307 disp = XVECEXP (disp, 0, 0);
13308 return (GET_CODE (disp) == SYMBOL_REF
13309 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13310 case UNSPEC_NTPOFF:
13311 disp = XVECEXP (disp, 0, 0);
13312 return (GET_CODE (disp) == SYMBOL_REF
13313 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13314 case UNSPEC_DTPOFF:
13315 disp = XVECEXP (disp, 0, 0);
13316 return (GET_CODE (disp) == SYMBOL_REF
13317 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13320 return false;
13323 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13324 replace the input X, or the original X if no replacement is called for.
13325 The output parameter *WIN is 1 if the calling macro should goto WIN,
13326 0 if it should not. */
13328 bool
13329 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13330 int)
13332 /* Reload can generate:
13334 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13335 (reg:DI 97))
13336 (reg:DI 2 cx))
13338 This RTX is rejected from ix86_legitimate_address_p due to
13339 non-strictness of base register 97. Following this rejection,
13340 reload pushes all three components into separate registers,
13341 creating invalid memory address RTX.
13343 Following code reloads only the invalid part of the
13344 memory address RTX. */
13346 if (GET_CODE (x) == PLUS
13347 && REG_P (XEXP (x, 1))
13348 && GET_CODE (XEXP (x, 0)) == PLUS
13349 && REG_P (XEXP (XEXP (x, 0), 1)))
13351 rtx base, index;
13352 bool something_reloaded = false;
13354 base = XEXP (XEXP (x, 0), 1);
13355 if (!REG_OK_FOR_BASE_STRICT_P (base))
13357 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13358 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13359 opnum, (enum reload_type) type);
13360 something_reloaded = true;
13363 index = XEXP (x, 1);
13364 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13366 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13367 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13368 opnum, (enum reload_type) type);
13369 something_reloaded = true;
13372 gcc_assert (something_reloaded);
13373 return true;
13376 return false;
13379 /* Determine if op is suitable RTX for an address register.
13380 Return naked register if a register or a register subreg is
13381 found, otherwise return NULL_RTX. */
13383 static rtx
13384 ix86_validate_address_register (rtx op)
13386 machine_mode mode = GET_MODE (op);
13388 /* Only SImode or DImode registers can form the address. */
13389 if (mode != SImode && mode != DImode)
13390 return NULL_RTX;
13392 if (REG_P (op))
13393 return op;
13394 else if (GET_CODE (op) == SUBREG)
13396 rtx reg = SUBREG_REG (op);
13398 if (!REG_P (reg))
13399 return NULL_RTX;
13401 mode = GET_MODE (reg);
13403 /* Don't allow SUBREGs that span more than a word. It can
13404 lead to spill failures when the register is one word out
13405 of a two word structure. */
13406 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13407 return NULL_RTX;
13409 /* Allow only SUBREGs of non-eliminable hard registers. */
13410 if (register_no_elim_operand (reg, mode))
13411 return reg;
13414 /* Op is not a register. */
13415 return NULL_RTX;
13418 /* Recognizes RTL expressions that are valid memory addresses for an
13419 instruction. The MODE argument is the machine mode for the MEM
13420 expression that wants to use this address.
13422 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13423 convert common non-canonical forms to canonical form so that they will
13424 be recognized. */
13426 static bool
13427 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13429 struct ix86_address parts;
13430 rtx base, index, disp;
13431 HOST_WIDE_INT scale;
13432 enum ix86_address_seg seg;
13434 if (ix86_decompose_address (addr, &parts) <= 0)
13435 /* Decomposition failed. */
13436 return false;
13438 base = parts.base;
13439 index = parts.index;
13440 disp = parts.disp;
13441 scale = parts.scale;
13442 seg = parts.seg;
13444 /* Validate base register. */
13445 if (base)
13447 rtx reg = ix86_validate_address_register (base);
13449 if (reg == NULL_RTX)
13450 return false;
13452 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13453 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13454 /* Base is not valid. */
13455 return false;
13458 /* Validate index register. */
13459 if (index)
13461 rtx reg = ix86_validate_address_register (index);
13463 if (reg == NULL_RTX)
13464 return false;
13466 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13467 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13468 /* Index is not valid. */
13469 return false;
13472 /* Index and base should have the same mode. */
13473 if (base && index
13474 && GET_MODE (base) != GET_MODE (index))
13475 return false;
13477 /* Address override works only on the (%reg) part of %fs:(%reg). */
13478 if (seg != SEG_DEFAULT
13479 && ((base && GET_MODE (base) != word_mode)
13480 || (index && GET_MODE (index) != word_mode)))
13481 return false;
13483 /* Validate scale factor. */
13484 if (scale != 1)
13486 if (!index)
13487 /* Scale without index. */
13488 return false;
13490 if (scale != 2 && scale != 4 && scale != 8)
13491 /* Scale is not a valid multiplier. */
13492 return false;
13495 /* Validate displacement. */
13496 if (disp)
13498 if (GET_CODE (disp) == CONST
13499 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13500 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13501 switch (XINT (XEXP (disp, 0), 1))
13503 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13504 used. While ABI specify also 32bit relocations, we don't produce
13505 them at all and use IP relative instead. */
13506 case UNSPEC_GOT:
13507 case UNSPEC_GOTOFF:
13508 gcc_assert (flag_pic);
13509 if (!TARGET_64BIT)
13510 goto is_legitimate_pic;
13512 /* 64bit address unspec. */
13513 return false;
13515 case UNSPEC_GOTPCREL:
13516 case UNSPEC_PCREL:
13517 gcc_assert (flag_pic);
13518 goto is_legitimate_pic;
13520 case UNSPEC_GOTTPOFF:
13521 case UNSPEC_GOTNTPOFF:
13522 case UNSPEC_INDNTPOFF:
13523 case UNSPEC_NTPOFF:
13524 case UNSPEC_DTPOFF:
13525 break;
13527 case UNSPEC_STACK_CHECK:
13528 gcc_assert (flag_split_stack);
13529 break;
13531 default:
13532 /* Invalid address unspec. */
13533 return false;
13536 else if (SYMBOLIC_CONST (disp)
13537 && (flag_pic
13538 || (TARGET_MACHO
13539 #if TARGET_MACHO
13540 && MACHOPIC_INDIRECT
13541 && !machopic_operand_p (disp)
13542 #endif
13546 is_legitimate_pic:
13547 if (TARGET_64BIT && (index || base))
13549 /* foo@dtpoff(%rX) is ok. */
13550 if (GET_CODE (disp) != CONST
13551 || GET_CODE (XEXP (disp, 0)) != PLUS
13552 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13553 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13554 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13555 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13556 /* Non-constant pic memory reference. */
13557 return false;
13559 else if ((!TARGET_MACHO || flag_pic)
13560 && ! legitimate_pic_address_disp_p (disp))
13561 /* Displacement is an invalid pic construct. */
13562 return false;
13563 #if TARGET_MACHO
13564 else if (MACHO_DYNAMIC_NO_PIC_P
13565 && !ix86_legitimate_constant_p (Pmode, disp))
13566 /* displacment must be referenced via non_lazy_pointer */
13567 return false;
13568 #endif
13570 /* This code used to verify that a symbolic pic displacement
13571 includes the pic_offset_table_rtx register.
13573 While this is good idea, unfortunately these constructs may
13574 be created by "adds using lea" optimization for incorrect
13575 code like:
13577 int a;
13578 int foo(int i)
13580 return *(&a+i);
13583 This code is nonsensical, but results in addressing
13584 GOT table with pic_offset_table_rtx base. We can't
13585 just refuse it easily, since it gets matched by
13586 "addsi3" pattern, that later gets split to lea in the
13587 case output register differs from input. While this
13588 can be handled by separate addsi pattern for this case
13589 that never results in lea, this seems to be easier and
13590 correct fix for crash to disable this test. */
13592 else if (GET_CODE (disp) != LABEL_REF
13593 && !CONST_INT_P (disp)
13594 && (GET_CODE (disp) != CONST
13595 || !ix86_legitimate_constant_p (Pmode, disp))
13596 && (GET_CODE (disp) != SYMBOL_REF
13597 || !ix86_legitimate_constant_p (Pmode, disp)))
13598 /* Displacement is not constant. */
13599 return false;
13600 else if (TARGET_64BIT
13601 && !x86_64_immediate_operand (disp, VOIDmode))
13602 /* Displacement is out of range. */
13603 return false;
13604 /* In x32 mode, constant addresses are sign extended to 64bit, so
13605 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13606 else if (TARGET_X32 && !(index || base)
13607 && CONST_INT_P (disp)
13608 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13609 return false;
13612 /* Everything looks valid. */
13613 return true;
13616 /* Determine if a given RTX is a valid constant address. */
13618 bool
13619 constant_address_p (rtx x)
13621 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13624 /* Return a unique alias set for the GOT. */
13626 static alias_set_type
13627 ix86_GOT_alias_set (void)
13629 static alias_set_type set = -1;
13630 if (set == -1)
13631 set = new_alias_set ();
13632 return set;
13635 /* Set regs_ever_live for PIC base address register
13636 to true if required. */
13637 static void
13638 set_pic_reg_ever_live ()
13640 if (reload_in_progress)
13641 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13644 /* Return a legitimate reference for ORIG (an address) using the
13645 register REG. If REG is 0, a new pseudo is generated.
13647 There are two types of references that must be handled:
13649 1. Global data references must load the address from the GOT, via
13650 the PIC reg. An insn is emitted to do this load, and the reg is
13651 returned.
13653 2. Static data references, constant pool addresses, and code labels
13654 compute the address as an offset from the GOT, whose base is in
13655 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13656 differentiate them from global data objects. The returned
13657 address is the PIC reg + an unspec constant.
13659 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13660 reg also appears in the address. */
13662 static rtx
13663 legitimize_pic_address (rtx orig, rtx reg)
13665 rtx addr = orig;
13666 rtx new_rtx = orig;
13668 #if TARGET_MACHO
13669 if (TARGET_MACHO && !TARGET_64BIT)
13671 if (reg == 0)
13672 reg = gen_reg_rtx (Pmode);
13673 /* Use the generic Mach-O PIC machinery. */
13674 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13676 #endif
13678 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13680 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13681 if (tmp)
13682 return tmp;
13685 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13686 new_rtx = addr;
13687 else if (TARGET_64BIT && !TARGET_PECOFF
13688 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13690 rtx tmpreg;
13691 /* This symbol may be referenced via a displacement from the PIC
13692 base address (@GOTOFF). */
13694 set_pic_reg_ever_live ();
13695 if (GET_CODE (addr) == CONST)
13696 addr = XEXP (addr, 0);
13697 if (GET_CODE (addr) == PLUS)
13699 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13700 UNSPEC_GOTOFF);
13701 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13703 else
13704 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13705 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13706 if (!reg)
13707 tmpreg = gen_reg_rtx (Pmode);
13708 else
13709 tmpreg = reg;
13710 emit_move_insn (tmpreg, new_rtx);
13712 if (reg != 0)
13714 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13715 tmpreg, 1, OPTAB_DIRECT);
13716 new_rtx = reg;
13718 else
13719 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13721 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13723 /* This symbol may be referenced via a displacement from the PIC
13724 base address (@GOTOFF). */
13726 set_pic_reg_ever_live ();
13727 if (GET_CODE (addr) == CONST)
13728 addr = XEXP (addr, 0);
13729 if (GET_CODE (addr) == PLUS)
13731 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13732 UNSPEC_GOTOFF);
13733 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13735 else
13736 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13737 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13740 if (reg != 0)
13742 emit_move_insn (reg, new_rtx);
13743 new_rtx = reg;
13746 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13747 /* We can't use @GOTOFF for text labels on VxWorks;
13748 see gotoff_operand. */
13749 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13751 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13752 if (tmp)
13753 return tmp;
13755 /* For x64 PE-COFF there is no GOT table. So we use address
13756 directly. */
13757 if (TARGET_64BIT && TARGET_PECOFF)
13759 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13760 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13762 if (reg == 0)
13763 reg = gen_reg_rtx (Pmode);
13764 emit_move_insn (reg, new_rtx);
13765 new_rtx = reg;
13767 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13769 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13770 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13771 new_rtx = gen_const_mem (Pmode, new_rtx);
13772 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13774 if (reg == 0)
13775 reg = gen_reg_rtx (Pmode);
13776 /* Use directly gen_movsi, otherwise the address is loaded
13777 into register for CSE. We don't want to CSE this addresses,
13778 instead we CSE addresses from the GOT table, so skip this. */
13779 emit_insn (gen_movsi (reg, new_rtx));
13780 new_rtx = reg;
13782 else
13784 /* This symbol must be referenced via a load from the
13785 Global Offset Table (@GOT). */
13787 set_pic_reg_ever_live ();
13788 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13789 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13790 if (TARGET_64BIT)
13791 new_rtx = force_reg (Pmode, new_rtx);
13792 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13793 new_rtx = gen_const_mem (Pmode, new_rtx);
13794 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13796 if (reg == 0)
13797 reg = gen_reg_rtx (Pmode);
13798 emit_move_insn (reg, new_rtx);
13799 new_rtx = reg;
13802 else
13804 if (CONST_INT_P (addr)
13805 && !x86_64_immediate_operand (addr, VOIDmode))
13807 if (reg)
13809 emit_move_insn (reg, addr);
13810 new_rtx = reg;
13812 else
13813 new_rtx = force_reg (Pmode, addr);
13815 else if (GET_CODE (addr) == CONST)
13817 addr = XEXP (addr, 0);
13819 /* We must match stuff we generate before. Assume the only
13820 unspecs that can get here are ours. Not that we could do
13821 anything with them anyway.... */
13822 if (GET_CODE (addr) == UNSPEC
13823 || (GET_CODE (addr) == PLUS
13824 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13825 return orig;
13826 gcc_assert (GET_CODE (addr) == PLUS);
13828 if (GET_CODE (addr) == PLUS)
13830 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13832 /* Check first to see if this is a constant offset from a @GOTOFF
13833 symbol reference. */
13834 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13835 && CONST_INT_P (op1))
13837 if (!TARGET_64BIT)
13839 set_pic_reg_ever_live ();
13840 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13841 UNSPEC_GOTOFF);
13842 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13843 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13844 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13846 if (reg != 0)
13848 emit_move_insn (reg, new_rtx);
13849 new_rtx = reg;
13852 else
13854 if (INTVAL (op1) < -16*1024*1024
13855 || INTVAL (op1) >= 16*1024*1024)
13857 if (!x86_64_immediate_operand (op1, Pmode))
13858 op1 = force_reg (Pmode, op1);
13859 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13863 else
13865 rtx base = legitimize_pic_address (op0, reg);
13866 machine_mode mode = GET_MODE (base);
13867 new_rtx
13868 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13870 if (CONST_INT_P (new_rtx))
13872 if (INTVAL (new_rtx) < -16*1024*1024
13873 || INTVAL (new_rtx) >= 16*1024*1024)
13875 if (!x86_64_immediate_operand (new_rtx, mode))
13876 new_rtx = force_reg (mode, new_rtx);
13877 new_rtx
13878 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13880 else
13881 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13883 else
13885 /* For %rip addressing, we have to use just disp32, not
13886 base nor index. */
13887 if (TARGET_64BIT
13888 && (GET_CODE (base) == SYMBOL_REF
13889 || GET_CODE (base) == LABEL_REF))
13890 base = force_reg (mode, base);
13891 if (GET_CODE (new_rtx) == PLUS
13892 && CONSTANT_P (XEXP (new_rtx, 1)))
13894 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13895 new_rtx = XEXP (new_rtx, 1);
13897 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13902 return new_rtx;
13905 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13907 static rtx
13908 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13910 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13912 if (GET_MODE (tp) != tp_mode)
13914 gcc_assert (GET_MODE (tp) == SImode);
13915 gcc_assert (tp_mode == DImode);
13917 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13920 if (to_reg)
13921 tp = copy_to_mode_reg (tp_mode, tp);
13923 return tp;
13926 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13928 static GTY(()) rtx ix86_tls_symbol;
13930 static rtx
13931 ix86_tls_get_addr (void)
13933 if (!ix86_tls_symbol)
13935 const char *sym
13936 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13937 ? "___tls_get_addr" : "__tls_get_addr");
13939 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13942 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13944 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13945 UNSPEC_PLTOFF);
13946 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13947 gen_rtx_CONST (Pmode, unspec));
13950 return ix86_tls_symbol;
13953 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13955 static GTY(()) rtx ix86_tls_module_base_symbol;
13958 ix86_tls_module_base (void)
13960 if (!ix86_tls_module_base_symbol)
13962 ix86_tls_module_base_symbol
13963 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13965 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13966 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13969 return ix86_tls_module_base_symbol;
13972 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13973 false if we expect this to be used for a memory address and true if
13974 we expect to load the address into a register. */
13976 static rtx
13977 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13979 rtx dest, base, off;
13980 rtx pic = NULL_RTX, tp = NULL_RTX;
13981 machine_mode tp_mode = Pmode;
13982 int type;
13984 /* Fall back to global dynamic model if tool chain cannot support local
13985 dynamic. */
13986 if (TARGET_SUN_TLS && !TARGET_64BIT
13987 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13988 && model == TLS_MODEL_LOCAL_DYNAMIC)
13989 model = TLS_MODEL_GLOBAL_DYNAMIC;
13991 switch (model)
13993 case TLS_MODEL_GLOBAL_DYNAMIC:
13994 dest = gen_reg_rtx (Pmode);
13996 if (!TARGET_64BIT)
13998 if (flag_pic && !TARGET_PECOFF)
13999 pic = pic_offset_table_rtx;
14000 else
14002 pic = gen_reg_rtx (Pmode);
14003 emit_insn (gen_set_got (pic));
14007 if (TARGET_GNU2_TLS)
14009 if (TARGET_64BIT)
14010 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14011 else
14012 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14014 tp = get_thread_pointer (Pmode, true);
14015 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14017 if (GET_MODE (x) != Pmode)
14018 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14020 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14022 else
14024 rtx caddr = ix86_tls_get_addr ();
14026 if (TARGET_64BIT)
14028 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14029 rtx_insn *insns;
14031 start_sequence ();
14032 emit_call_insn
14033 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14034 insns = get_insns ();
14035 end_sequence ();
14037 if (GET_MODE (x) != Pmode)
14038 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14040 RTL_CONST_CALL_P (insns) = 1;
14041 emit_libcall_block (insns, dest, rax, x);
14043 else
14044 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14046 break;
14048 case TLS_MODEL_LOCAL_DYNAMIC:
14049 base = gen_reg_rtx (Pmode);
14051 if (!TARGET_64BIT)
14053 if (flag_pic)
14054 pic = pic_offset_table_rtx;
14055 else
14057 pic = gen_reg_rtx (Pmode);
14058 emit_insn (gen_set_got (pic));
14062 if (TARGET_GNU2_TLS)
14064 rtx tmp = ix86_tls_module_base ();
14066 if (TARGET_64BIT)
14067 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14068 else
14069 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14071 tp = get_thread_pointer (Pmode, true);
14072 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14073 gen_rtx_MINUS (Pmode, tmp, tp));
14075 else
14077 rtx caddr = ix86_tls_get_addr ();
14079 if (TARGET_64BIT)
14081 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14082 rtx_insn *insns;
14083 rtx eqv;
14085 start_sequence ();
14086 emit_call_insn
14087 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14088 insns = get_insns ();
14089 end_sequence ();
14091 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14092 share the LD_BASE result with other LD model accesses. */
14093 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14094 UNSPEC_TLS_LD_BASE);
14096 RTL_CONST_CALL_P (insns) = 1;
14097 emit_libcall_block (insns, base, rax, eqv);
14099 else
14100 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14103 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14104 off = gen_rtx_CONST (Pmode, off);
14106 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14108 if (TARGET_GNU2_TLS)
14110 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14112 if (GET_MODE (x) != Pmode)
14113 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14115 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14117 break;
14119 case TLS_MODEL_INITIAL_EXEC:
14120 if (TARGET_64BIT)
14122 if (TARGET_SUN_TLS && !TARGET_X32)
14124 /* The Sun linker took the AMD64 TLS spec literally
14125 and can only handle %rax as destination of the
14126 initial executable code sequence. */
14128 dest = gen_reg_rtx (DImode);
14129 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14130 return dest;
14133 /* Generate DImode references to avoid %fs:(%reg32)
14134 problems and linker IE->LE relaxation bug. */
14135 tp_mode = DImode;
14136 pic = NULL;
14137 type = UNSPEC_GOTNTPOFF;
14139 else if (flag_pic)
14141 set_pic_reg_ever_live ();
14142 pic = pic_offset_table_rtx;
14143 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14145 else if (!TARGET_ANY_GNU_TLS)
14147 pic = gen_reg_rtx (Pmode);
14148 emit_insn (gen_set_got (pic));
14149 type = UNSPEC_GOTTPOFF;
14151 else
14153 pic = NULL;
14154 type = UNSPEC_INDNTPOFF;
14157 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14158 off = gen_rtx_CONST (tp_mode, off);
14159 if (pic)
14160 off = gen_rtx_PLUS (tp_mode, pic, off);
14161 off = gen_const_mem (tp_mode, off);
14162 set_mem_alias_set (off, ix86_GOT_alias_set ());
14164 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14166 base = get_thread_pointer (tp_mode,
14167 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14168 off = force_reg (tp_mode, off);
14169 return gen_rtx_PLUS (tp_mode, base, off);
14171 else
14173 base = get_thread_pointer (Pmode, true);
14174 dest = gen_reg_rtx (Pmode);
14175 emit_insn (ix86_gen_sub3 (dest, base, off));
14177 break;
14179 case TLS_MODEL_LOCAL_EXEC:
14180 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14181 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14182 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14183 off = gen_rtx_CONST (Pmode, off);
14185 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14187 base = get_thread_pointer (Pmode,
14188 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14189 return gen_rtx_PLUS (Pmode, base, off);
14191 else
14193 base = get_thread_pointer (Pmode, true);
14194 dest = gen_reg_rtx (Pmode);
14195 emit_insn (ix86_gen_sub3 (dest, base, off));
14197 break;
14199 default:
14200 gcc_unreachable ();
14203 return dest;
14206 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14207 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14208 unique refptr-DECL symbol corresponding to symbol DECL. */
14210 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14212 static inline hashval_t hash (tree_map *m) { return m->hash; }
14213 static inline bool
14214 equal (tree_map *a, tree_map *b)
14216 return a->base.from == b->base.from;
14219 static void
14220 handle_cache_entry (tree_map *&m)
14222 extern void gt_ggc_mx (tree_map *&);
14223 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14224 return;
14225 else if (ggc_marked_p (m->base.from))
14226 gt_ggc_mx (m);
14227 else
14228 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14232 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14234 static tree
14235 get_dllimport_decl (tree decl, bool beimport)
14237 struct tree_map *h, in;
14238 const char *name;
14239 const char *prefix;
14240 size_t namelen, prefixlen;
14241 char *imp_name;
14242 tree to;
14243 rtx rtl;
14245 if (!dllimport_map)
14246 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14248 in.hash = htab_hash_pointer (decl);
14249 in.base.from = decl;
14250 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14251 h = *loc;
14252 if (h)
14253 return h->to;
14255 *loc = h = ggc_alloc<tree_map> ();
14256 h->hash = in.hash;
14257 h->base.from = decl;
14258 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14259 VAR_DECL, NULL, ptr_type_node);
14260 DECL_ARTIFICIAL (to) = 1;
14261 DECL_IGNORED_P (to) = 1;
14262 DECL_EXTERNAL (to) = 1;
14263 TREE_READONLY (to) = 1;
14265 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14266 name = targetm.strip_name_encoding (name);
14267 if (beimport)
14268 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14269 ? "*__imp_" : "*__imp__";
14270 else
14271 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14272 namelen = strlen (name);
14273 prefixlen = strlen (prefix);
14274 imp_name = (char *) alloca (namelen + prefixlen + 1);
14275 memcpy (imp_name, prefix, prefixlen);
14276 memcpy (imp_name + prefixlen, name, namelen + 1);
14278 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14279 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14280 SET_SYMBOL_REF_DECL (rtl, to);
14281 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14282 if (!beimport)
14284 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14285 #ifdef SUB_TARGET_RECORD_STUB
14286 SUB_TARGET_RECORD_STUB (name);
14287 #endif
14290 rtl = gen_const_mem (Pmode, rtl);
14291 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14293 SET_DECL_RTL (to, rtl);
14294 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14296 return to;
14299 /* Expand SYMBOL into its corresponding far-addresse symbol.
14300 WANT_REG is true if we require the result be a register. */
14302 static rtx
14303 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14305 tree imp_decl;
14306 rtx x;
14308 gcc_assert (SYMBOL_REF_DECL (symbol));
14309 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14311 x = DECL_RTL (imp_decl);
14312 if (want_reg)
14313 x = force_reg (Pmode, x);
14314 return x;
14317 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14318 true if we require the result be a register. */
14320 static rtx
14321 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14323 tree imp_decl;
14324 rtx x;
14326 gcc_assert (SYMBOL_REF_DECL (symbol));
14327 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14329 x = DECL_RTL (imp_decl);
14330 if (want_reg)
14331 x = force_reg (Pmode, x);
14332 return x;
14335 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14336 is true if we require the result be a register. */
14338 static rtx
14339 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14341 if (!TARGET_PECOFF)
14342 return NULL_RTX;
14344 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14346 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14347 return legitimize_dllimport_symbol (addr, inreg);
14348 if (GET_CODE (addr) == CONST
14349 && GET_CODE (XEXP (addr, 0)) == PLUS
14350 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14351 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14353 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14354 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14358 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14359 return NULL_RTX;
14360 if (GET_CODE (addr) == SYMBOL_REF
14361 && !is_imported_p (addr)
14362 && SYMBOL_REF_EXTERNAL_P (addr)
14363 && SYMBOL_REF_DECL (addr))
14364 return legitimize_pe_coff_extern_decl (addr, inreg);
14366 if (GET_CODE (addr) == CONST
14367 && GET_CODE (XEXP (addr, 0)) == PLUS
14368 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14369 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14370 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14371 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14373 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14374 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14376 return NULL_RTX;
14379 /* Try machine-dependent ways of modifying an illegitimate address
14380 to be legitimate. If we find one, return the new, valid address.
14381 This macro is used in only one place: `memory_address' in explow.c.
14383 OLDX is the address as it was before break_out_memory_refs was called.
14384 In some cases it is useful to look at this to decide what needs to be done.
14386 It is always safe for this macro to do nothing. It exists to recognize
14387 opportunities to optimize the output.
14389 For the 80386, we handle X+REG by loading X into a register R and
14390 using R+REG. R will go in a general reg and indexing will be used.
14391 However, if REG is a broken-out memory address or multiplication,
14392 nothing needs to be done because REG can certainly go in a general reg.
14394 When -fpic is used, special handling is needed for symbolic references.
14395 See comments by legitimize_pic_address in i386.c for details. */
14397 static rtx
14398 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14400 bool changed = false;
14401 unsigned log;
14403 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14404 if (log)
14405 return legitimize_tls_address (x, (enum tls_model) log, false);
14406 if (GET_CODE (x) == CONST
14407 && GET_CODE (XEXP (x, 0)) == PLUS
14408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14409 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14411 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14412 (enum tls_model) log, false);
14413 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14416 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14418 rtx tmp = legitimize_pe_coff_symbol (x, true);
14419 if (tmp)
14420 return tmp;
14423 if (flag_pic && SYMBOLIC_CONST (x))
14424 return legitimize_pic_address (x, 0);
14426 #if TARGET_MACHO
14427 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14428 return machopic_indirect_data_reference (x, 0);
14429 #endif
14431 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14432 if (GET_CODE (x) == ASHIFT
14433 && CONST_INT_P (XEXP (x, 1))
14434 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14436 changed = true;
14437 log = INTVAL (XEXP (x, 1));
14438 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14439 GEN_INT (1 << log));
14442 if (GET_CODE (x) == PLUS)
14444 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14446 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14447 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14448 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14450 changed = true;
14451 log = INTVAL (XEXP (XEXP (x, 0), 1));
14452 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14453 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14454 GEN_INT (1 << log));
14457 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14458 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14459 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14461 changed = true;
14462 log = INTVAL (XEXP (XEXP (x, 1), 1));
14463 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14464 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14465 GEN_INT (1 << log));
14468 /* Put multiply first if it isn't already. */
14469 if (GET_CODE (XEXP (x, 1)) == MULT)
14471 std::swap (XEXP (x, 0), XEXP (x, 1));
14472 changed = true;
14475 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14476 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14477 created by virtual register instantiation, register elimination, and
14478 similar optimizations. */
14479 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14481 changed = true;
14482 x = gen_rtx_PLUS (Pmode,
14483 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14484 XEXP (XEXP (x, 1), 0)),
14485 XEXP (XEXP (x, 1), 1));
14488 /* Canonicalize
14489 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14490 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14491 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14492 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14493 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14494 && CONSTANT_P (XEXP (x, 1)))
14496 rtx constant;
14497 rtx other = NULL_RTX;
14499 if (CONST_INT_P (XEXP (x, 1)))
14501 constant = XEXP (x, 1);
14502 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14504 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14506 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14507 other = XEXP (x, 1);
14509 else
14510 constant = 0;
14512 if (constant)
14514 changed = true;
14515 x = gen_rtx_PLUS (Pmode,
14516 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14517 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14518 plus_constant (Pmode, other,
14519 INTVAL (constant)));
14523 if (changed && ix86_legitimate_address_p (mode, x, false))
14524 return x;
14526 if (GET_CODE (XEXP (x, 0)) == MULT)
14528 changed = true;
14529 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14532 if (GET_CODE (XEXP (x, 1)) == MULT)
14534 changed = true;
14535 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14538 if (changed
14539 && REG_P (XEXP (x, 1))
14540 && REG_P (XEXP (x, 0)))
14541 return x;
14543 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14545 changed = true;
14546 x = legitimize_pic_address (x, 0);
14549 if (changed && ix86_legitimate_address_p (mode, x, false))
14550 return x;
14552 if (REG_P (XEXP (x, 0)))
14554 rtx temp = gen_reg_rtx (Pmode);
14555 rtx val = force_operand (XEXP (x, 1), temp);
14556 if (val != temp)
14558 val = convert_to_mode (Pmode, val, 1);
14559 emit_move_insn (temp, val);
14562 XEXP (x, 1) = temp;
14563 return x;
14566 else if (REG_P (XEXP (x, 1)))
14568 rtx temp = gen_reg_rtx (Pmode);
14569 rtx val = force_operand (XEXP (x, 0), temp);
14570 if (val != temp)
14572 val = convert_to_mode (Pmode, val, 1);
14573 emit_move_insn (temp, val);
14576 XEXP (x, 0) = temp;
14577 return x;
14581 return x;
14584 /* Print an integer constant expression in assembler syntax. Addition
14585 and subtraction are the only arithmetic that may appear in these
14586 expressions. FILE is the stdio stream to write to, X is the rtx, and
14587 CODE is the operand print code from the output string. */
14589 static void
14590 output_pic_addr_const (FILE *file, rtx x, int code)
14592 char buf[256];
14594 switch (GET_CODE (x))
14596 case PC:
14597 gcc_assert (flag_pic);
14598 putc ('.', file);
14599 break;
14601 case SYMBOL_REF:
14602 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14603 output_addr_const (file, x);
14604 else
14606 const char *name = XSTR (x, 0);
14608 /* Mark the decl as referenced so that cgraph will
14609 output the function. */
14610 if (SYMBOL_REF_DECL (x))
14611 mark_decl_referenced (SYMBOL_REF_DECL (x));
14613 #if TARGET_MACHO
14614 if (MACHOPIC_INDIRECT
14615 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14616 name = machopic_indirection_name (x, /*stub_p=*/true);
14617 #endif
14618 assemble_name (file, name);
14620 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14621 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14622 fputs ("@PLT", file);
14623 break;
14625 case LABEL_REF:
14626 x = XEXP (x, 0);
14627 /* FALLTHRU */
14628 case CODE_LABEL:
14629 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14630 assemble_name (asm_out_file, buf);
14631 break;
14633 case CONST_INT:
14634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14635 break;
14637 case CONST:
14638 /* This used to output parentheses around the expression,
14639 but that does not work on the 386 (either ATT or BSD assembler). */
14640 output_pic_addr_const (file, XEXP (x, 0), code);
14641 break;
14643 case CONST_DOUBLE:
14644 if (GET_MODE (x) == VOIDmode)
14646 /* We can use %d if the number is <32 bits and positive. */
14647 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14648 fprintf (file, "0x%lx%08lx",
14649 (unsigned long) CONST_DOUBLE_HIGH (x),
14650 (unsigned long) CONST_DOUBLE_LOW (x));
14651 else
14652 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14654 else
14655 /* We can't handle floating point constants;
14656 TARGET_PRINT_OPERAND must handle them. */
14657 output_operand_lossage ("floating constant misused");
14658 break;
14660 case PLUS:
14661 /* Some assemblers need integer constants to appear first. */
14662 if (CONST_INT_P (XEXP (x, 0)))
14664 output_pic_addr_const (file, XEXP (x, 0), code);
14665 putc ('+', file);
14666 output_pic_addr_const (file, XEXP (x, 1), code);
14668 else
14670 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14671 output_pic_addr_const (file, XEXP (x, 1), code);
14672 putc ('+', file);
14673 output_pic_addr_const (file, XEXP (x, 0), code);
14675 break;
14677 case MINUS:
14678 if (!TARGET_MACHO)
14679 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14680 output_pic_addr_const (file, XEXP (x, 0), code);
14681 putc ('-', file);
14682 output_pic_addr_const (file, XEXP (x, 1), code);
14683 if (!TARGET_MACHO)
14684 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14685 break;
14687 case UNSPEC:
14688 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14690 bool f = i386_asm_output_addr_const_extra (file, x);
14691 gcc_assert (f);
14692 break;
14695 gcc_assert (XVECLEN (x, 0) == 1);
14696 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14697 switch (XINT (x, 1))
14699 case UNSPEC_GOT:
14700 fputs ("@GOT", file);
14701 break;
14702 case UNSPEC_GOTOFF:
14703 fputs ("@GOTOFF", file);
14704 break;
14705 case UNSPEC_PLTOFF:
14706 fputs ("@PLTOFF", file);
14707 break;
14708 case UNSPEC_PCREL:
14709 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14710 "(%rip)" : "[rip]", file);
14711 break;
14712 case UNSPEC_GOTPCREL:
14713 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14714 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14715 break;
14716 case UNSPEC_GOTTPOFF:
14717 /* FIXME: This might be @TPOFF in Sun ld too. */
14718 fputs ("@gottpoff", file);
14719 break;
14720 case UNSPEC_TPOFF:
14721 fputs ("@tpoff", file);
14722 break;
14723 case UNSPEC_NTPOFF:
14724 if (TARGET_64BIT)
14725 fputs ("@tpoff", file);
14726 else
14727 fputs ("@ntpoff", file);
14728 break;
14729 case UNSPEC_DTPOFF:
14730 fputs ("@dtpoff", file);
14731 break;
14732 case UNSPEC_GOTNTPOFF:
14733 if (TARGET_64BIT)
14734 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14735 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14736 else
14737 fputs ("@gotntpoff", file);
14738 break;
14739 case UNSPEC_INDNTPOFF:
14740 fputs ("@indntpoff", file);
14741 break;
14742 #if TARGET_MACHO
14743 case UNSPEC_MACHOPIC_OFFSET:
14744 putc ('-', file);
14745 machopic_output_function_base_name (file);
14746 break;
14747 #endif
14748 default:
14749 output_operand_lossage ("invalid UNSPEC as operand");
14750 break;
14752 break;
14754 default:
14755 output_operand_lossage ("invalid expression as operand");
14759 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14760 We need to emit DTP-relative relocations. */
14762 static void ATTRIBUTE_UNUSED
14763 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14765 fputs (ASM_LONG, file);
14766 output_addr_const (file, x);
14767 fputs ("@dtpoff", file);
14768 switch (size)
14770 case 4:
14771 break;
14772 case 8:
14773 fputs (", 0", file);
14774 break;
14775 default:
14776 gcc_unreachable ();
14780 /* Return true if X is a representation of the PIC register. This copes
14781 with calls from ix86_find_base_term, where the register might have
14782 been replaced by a cselib value. */
14784 static bool
14785 ix86_pic_register_p (rtx x)
14787 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14788 return (pic_offset_table_rtx
14789 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14790 else if (!REG_P (x))
14791 return false;
14792 else if (pic_offset_table_rtx)
14794 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14795 return true;
14796 if (HARD_REGISTER_P (x)
14797 && !HARD_REGISTER_P (pic_offset_table_rtx)
14798 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14799 return true;
14800 return false;
14802 else
14803 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14806 /* Helper function for ix86_delegitimize_address.
14807 Attempt to delegitimize TLS local-exec accesses. */
14809 static rtx
14810 ix86_delegitimize_tls_address (rtx orig_x)
14812 rtx x = orig_x, unspec;
14813 struct ix86_address addr;
14815 if (!TARGET_TLS_DIRECT_SEG_REFS)
14816 return orig_x;
14817 if (MEM_P (x))
14818 x = XEXP (x, 0);
14819 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14820 return orig_x;
14821 if (ix86_decompose_address (x, &addr) == 0
14822 || addr.seg != DEFAULT_TLS_SEG_REG
14823 || addr.disp == NULL_RTX
14824 || GET_CODE (addr.disp) != CONST)
14825 return orig_x;
14826 unspec = XEXP (addr.disp, 0);
14827 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14828 unspec = XEXP (unspec, 0);
14829 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14830 return orig_x;
14831 x = XVECEXP (unspec, 0, 0);
14832 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14833 if (unspec != XEXP (addr.disp, 0))
14834 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14835 if (addr.index)
14837 rtx idx = addr.index;
14838 if (addr.scale != 1)
14839 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14840 x = gen_rtx_PLUS (Pmode, idx, x);
14842 if (addr.base)
14843 x = gen_rtx_PLUS (Pmode, addr.base, x);
14844 if (MEM_P (orig_x))
14845 x = replace_equiv_address_nv (orig_x, x);
14846 return x;
14849 /* In the name of slightly smaller debug output, and to cater to
14850 general assembler lossage, recognize PIC+GOTOFF and turn it back
14851 into a direct symbol reference.
14853 On Darwin, this is necessary to avoid a crash, because Darwin
14854 has a different PIC label for each routine but the DWARF debugging
14855 information is not associated with any particular routine, so it's
14856 necessary to remove references to the PIC label from RTL stored by
14857 the DWARF output code. */
14859 static rtx
14860 ix86_delegitimize_address (rtx x)
14862 rtx orig_x = delegitimize_mem_from_attrs (x);
14863 /* addend is NULL or some rtx if x is something+GOTOFF where
14864 something doesn't include the PIC register. */
14865 rtx addend = NULL_RTX;
14866 /* reg_addend is NULL or a multiple of some register. */
14867 rtx reg_addend = NULL_RTX;
14868 /* const_addend is NULL or a const_int. */
14869 rtx const_addend = NULL_RTX;
14870 /* This is the result, or NULL. */
14871 rtx result = NULL_RTX;
14873 x = orig_x;
14875 if (MEM_P (x))
14876 x = XEXP (x, 0);
14878 if (TARGET_64BIT)
14880 if (GET_CODE (x) == CONST
14881 && GET_CODE (XEXP (x, 0)) == PLUS
14882 && GET_MODE (XEXP (x, 0)) == Pmode
14883 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14884 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14885 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14887 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14888 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14889 if (MEM_P (orig_x))
14890 x = replace_equiv_address_nv (orig_x, x);
14891 return x;
14894 if (GET_CODE (x) == CONST
14895 && GET_CODE (XEXP (x, 0)) == UNSPEC
14896 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14897 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14898 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14900 x = XVECEXP (XEXP (x, 0), 0, 0);
14901 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14903 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14904 GET_MODE (x), 0);
14905 if (x == NULL_RTX)
14906 return orig_x;
14908 return x;
14911 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14912 return ix86_delegitimize_tls_address (orig_x);
14914 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14915 and -mcmodel=medium -fpic. */
14918 if (GET_CODE (x) != PLUS
14919 || GET_CODE (XEXP (x, 1)) != CONST)
14920 return ix86_delegitimize_tls_address (orig_x);
14922 if (ix86_pic_register_p (XEXP (x, 0)))
14923 /* %ebx + GOT/GOTOFF */
14925 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14927 /* %ebx + %reg * scale + GOT/GOTOFF */
14928 reg_addend = XEXP (x, 0);
14929 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14930 reg_addend = XEXP (reg_addend, 1);
14931 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14932 reg_addend = XEXP (reg_addend, 0);
14933 else
14935 reg_addend = NULL_RTX;
14936 addend = XEXP (x, 0);
14939 else
14940 addend = XEXP (x, 0);
14942 x = XEXP (XEXP (x, 1), 0);
14943 if (GET_CODE (x) == PLUS
14944 && CONST_INT_P (XEXP (x, 1)))
14946 const_addend = XEXP (x, 1);
14947 x = XEXP (x, 0);
14950 if (GET_CODE (x) == UNSPEC
14951 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14952 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14953 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14954 && !MEM_P (orig_x) && !addend)))
14955 result = XVECEXP (x, 0, 0);
14957 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14958 && !MEM_P (orig_x))
14959 result = XVECEXP (x, 0, 0);
14961 if (! result)
14962 return ix86_delegitimize_tls_address (orig_x);
14964 if (const_addend)
14965 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14966 if (reg_addend)
14967 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14968 if (addend)
14970 /* If the rest of original X doesn't involve the PIC register, add
14971 addend and subtract pic_offset_table_rtx. This can happen e.g.
14972 for code like:
14973 leal (%ebx, %ecx, 4), %ecx
14975 movl foo@GOTOFF(%ecx), %edx
14976 in which case we return (%ecx - %ebx) + foo
14977 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14978 and reload has completed. */
14979 if (pic_offset_table_rtx
14980 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14981 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14982 pic_offset_table_rtx),
14983 result);
14984 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14986 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14987 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14988 result = gen_rtx_PLUS (Pmode, tmp, result);
14990 else
14991 return orig_x;
14993 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14995 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14996 if (result == NULL_RTX)
14997 return orig_x;
14999 return result;
15002 /* If X is a machine specific address (i.e. a symbol or label being
15003 referenced as a displacement from the GOT implemented using an
15004 UNSPEC), then return the base term. Otherwise return X. */
15007 ix86_find_base_term (rtx x)
15009 rtx term;
15011 if (TARGET_64BIT)
15013 if (GET_CODE (x) != CONST)
15014 return x;
15015 term = XEXP (x, 0);
15016 if (GET_CODE (term) == PLUS
15017 && (CONST_INT_P (XEXP (term, 1))
15018 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15019 term = XEXP (term, 0);
15020 if (GET_CODE (term) != UNSPEC
15021 || (XINT (term, 1) != UNSPEC_GOTPCREL
15022 && XINT (term, 1) != UNSPEC_PCREL))
15023 return x;
15025 return XVECEXP (term, 0, 0);
15028 return ix86_delegitimize_address (x);
15031 static void
15032 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15033 bool fp, FILE *file)
15035 const char *suffix;
15037 if (mode == CCFPmode || mode == CCFPUmode)
15039 code = ix86_fp_compare_code_to_integer (code);
15040 mode = CCmode;
15042 if (reverse)
15043 code = reverse_condition (code);
15045 switch (code)
15047 case EQ:
15048 switch (mode)
15050 case CCAmode:
15051 suffix = "a";
15052 break;
15054 case CCCmode:
15055 suffix = "c";
15056 break;
15058 case CCOmode:
15059 suffix = "o";
15060 break;
15062 case CCSmode:
15063 suffix = "s";
15064 break;
15066 default:
15067 suffix = "e";
15069 break;
15070 case NE:
15071 switch (mode)
15073 case CCAmode:
15074 suffix = "na";
15075 break;
15077 case CCCmode:
15078 suffix = "nc";
15079 break;
15081 case CCOmode:
15082 suffix = "no";
15083 break;
15085 case CCSmode:
15086 suffix = "ns";
15087 break;
15089 default:
15090 suffix = "ne";
15092 break;
15093 case GT:
15094 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15095 suffix = "g";
15096 break;
15097 case GTU:
15098 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15099 Those same assemblers have the same but opposite lossage on cmov. */
15100 if (mode == CCmode)
15101 suffix = fp ? "nbe" : "a";
15102 else
15103 gcc_unreachable ();
15104 break;
15105 case LT:
15106 switch (mode)
15108 case CCNOmode:
15109 case CCGOCmode:
15110 suffix = "s";
15111 break;
15113 case CCmode:
15114 case CCGCmode:
15115 suffix = "l";
15116 break;
15118 default:
15119 gcc_unreachable ();
15121 break;
15122 case LTU:
15123 if (mode == CCmode)
15124 suffix = "b";
15125 else if (mode == CCCmode)
15126 suffix = fp ? "b" : "c";
15127 else
15128 gcc_unreachable ();
15129 break;
15130 case GE:
15131 switch (mode)
15133 case CCNOmode:
15134 case CCGOCmode:
15135 suffix = "ns";
15136 break;
15138 case CCmode:
15139 case CCGCmode:
15140 suffix = "ge";
15141 break;
15143 default:
15144 gcc_unreachable ();
15146 break;
15147 case GEU:
15148 if (mode == CCmode)
15149 suffix = "nb";
15150 else if (mode == CCCmode)
15151 suffix = fp ? "nb" : "nc";
15152 else
15153 gcc_unreachable ();
15154 break;
15155 case LE:
15156 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15157 suffix = "le";
15158 break;
15159 case LEU:
15160 if (mode == CCmode)
15161 suffix = "be";
15162 else
15163 gcc_unreachable ();
15164 break;
15165 case UNORDERED:
15166 suffix = fp ? "u" : "p";
15167 break;
15168 case ORDERED:
15169 suffix = fp ? "nu" : "np";
15170 break;
15171 default:
15172 gcc_unreachable ();
15174 fputs (suffix, file);
15177 /* Print the name of register X to FILE based on its machine mode and number.
15178 If CODE is 'w', pretend the mode is HImode.
15179 If CODE is 'b', pretend the mode is QImode.
15180 If CODE is 'k', pretend the mode is SImode.
15181 If CODE is 'q', pretend the mode is DImode.
15182 If CODE is 'x', pretend the mode is V4SFmode.
15183 If CODE is 't', pretend the mode is V8SFmode.
15184 If CODE is 'g', pretend the mode is V16SFmode.
15185 If CODE is 'h', pretend the reg is the 'high' byte register.
15186 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15187 If CODE is 'd', duplicate the operand for AVX instruction.
15190 void
15191 print_reg (rtx x, int code, FILE *file)
15193 const char *reg;
15194 unsigned int regno;
15195 bool duplicated = code == 'd' && TARGET_AVX;
15197 if (ASSEMBLER_DIALECT == ASM_ATT)
15198 putc ('%', file);
15200 if (x == pc_rtx)
15202 gcc_assert (TARGET_64BIT);
15203 fputs ("rip", file);
15204 return;
15207 regno = true_regnum (x);
15208 gcc_assert (regno != ARG_POINTER_REGNUM
15209 && regno != FRAME_POINTER_REGNUM
15210 && regno != FLAGS_REG
15211 && regno != FPSR_REG
15212 && regno != FPCR_REG);
15214 if (code == 'w' || MMX_REG_P (x))
15215 code = 2;
15216 else if (code == 'b')
15217 code = 1;
15218 else if (code == 'k')
15219 code = 4;
15220 else if (code == 'q')
15221 code = 8;
15222 else if (code == 'y')
15223 code = 3;
15224 else if (code == 'h')
15225 code = 0;
15226 else if (code == 'x')
15227 code = 16;
15228 else if (code == 't')
15229 code = 32;
15230 else if (code == 'g')
15231 code = 64;
15232 else
15233 code = GET_MODE_SIZE (GET_MODE (x));
15235 /* Irritatingly, AMD extended registers use different naming convention
15236 from the normal registers: "r%d[bwd]" */
15237 if (REX_INT_REGNO_P (regno))
15239 gcc_assert (TARGET_64BIT);
15240 putc ('r', file);
15241 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15242 switch (code)
15244 case 0:
15245 error ("extended registers have no high halves");
15246 break;
15247 case 1:
15248 putc ('b', file);
15249 break;
15250 case 2:
15251 putc ('w', file);
15252 break;
15253 case 4:
15254 putc ('d', file);
15255 break;
15256 case 8:
15257 /* no suffix */
15258 break;
15259 default:
15260 error ("unsupported operand size for extended register");
15261 break;
15263 return;
15266 reg = NULL;
15267 switch (code)
15269 case 3:
15270 if (STACK_TOP_P (x))
15272 reg = "st(0)";
15273 break;
15275 /* FALLTHRU */
15276 case 8:
15277 case 4:
15278 case 12:
15279 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15280 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15281 /* FALLTHRU */
15282 case 16:
15283 case 2:
15284 normal:
15285 reg = hi_reg_name[regno];
15286 break;
15287 case 1:
15288 if (regno >= ARRAY_SIZE (qi_reg_name))
15289 goto normal;
15290 reg = qi_reg_name[regno];
15291 break;
15292 case 0:
15293 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15294 goto normal;
15295 reg = qi_high_reg_name[regno];
15296 break;
15297 case 32:
15298 if (SSE_REG_P (x))
15300 gcc_assert (!duplicated);
15301 putc ('y', file);
15302 fputs (hi_reg_name[regno] + 1, file);
15303 return;
15305 case 64:
15306 if (SSE_REG_P (x))
15308 gcc_assert (!duplicated);
15309 putc ('z', file);
15310 fputs (hi_reg_name[REGNO (x)] + 1, file);
15311 return;
15313 break;
15314 default:
15315 gcc_unreachable ();
15318 fputs (reg, file);
15319 if (duplicated)
15321 if (ASSEMBLER_DIALECT == ASM_ATT)
15322 fprintf (file, ", %%%s", reg);
15323 else
15324 fprintf (file, ", %s", reg);
15328 /* Meaning of CODE:
15329 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15330 C -- print opcode suffix for set/cmov insn.
15331 c -- like C, but print reversed condition
15332 F,f -- likewise, but for floating-point.
15333 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15334 otherwise nothing
15335 R -- print embeded rounding and sae.
15336 r -- print only sae.
15337 z -- print the opcode suffix for the size of the current operand.
15338 Z -- likewise, with special suffixes for x87 instructions.
15339 * -- print a star (in certain assembler syntax)
15340 A -- print an absolute memory reference.
15341 E -- print address with DImode register names if TARGET_64BIT.
15342 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15343 s -- print a shift double count, followed by the assemblers argument
15344 delimiter.
15345 b -- print the QImode name of the register for the indicated operand.
15346 %b0 would print %al if operands[0] is reg 0.
15347 w -- likewise, print the HImode name of the register.
15348 k -- likewise, print the SImode name of the register.
15349 q -- likewise, print the DImode name of the register.
15350 x -- likewise, print the V4SFmode name of the register.
15351 t -- likewise, print the V8SFmode name of the register.
15352 g -- likewise, print the V16SFmode name of the register.
15353 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15354 y -- print "st(0)" instead of "st" as a register.
15355 d -- print duplicated register operand for AVX instruction.
15356 D -- print condition for SSE cmp instruction.
15357 P -- if PIC, print an @PLT suffix.
15358 p -- print raw symbol name.
15359 X -- don't print any sort of PIC '@' suffix for a symbol.
15360 & -- print some in-use local-dynamic symbol name.
15361 H -- print a memory address offset by 8; used for sse high-parts
15362 Y -- print condition for XOP pcom* instruction.
15363 + -- print a branch hint as 'cs' or 'ds' prefix
15364 ; -- print a semicolon (after prefixes due to bug in older gas).
15365 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15366 @ -- print a segment register of thread base pointer load
15367 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15368 ! -- print MPX prefix for jxx/call/ret instructions if required.
15371 void
15372 ix86_print_operand (FILE *file, rtx x, int code)
15374 if (code)
15376 switch (code)
15378 case 'A':
15379 switch (ASSEMBLER_DIALECT)
15381 case ASM_ATT:
15382 putc ('*', file);
15383 break;
15385 case ASM_INTEL:
15386 /* Intel syntax. For absolute addresses, registers should not
15387 be surrounded by braces. */
15388 if (!REG_P (x))
15390 putc ('[', file);
15391 ix86_print_operand (file, x, 0);
15392 putc (']', file);
15393 return;
15395 break;
15397 default:
15398 gcc_unreachable ();
15401 ix86_print_operand (file, x, 0);
15402 return;
15404 case 'E':
15405 /* Wrap address in an UNSPEC to declare special handling. */
15406 if (TARGET_64BIT)
15407 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15409 output_address (x);
15410 return;
15412 case 'L':
15413 if (ASSEMBLER_DIALECT == ASM_ATT)
15414 putc ('l', file);
15415 return;
15417 case 'W':
15418 if (ASSEMBLER_DIALECT == ASM_ATT)
15419 putc ('w', file);
15420 return;
15422 case 'B':
15423 if (ASSEMBLER_DIALECT == ASM_ATT)
15424 putc ('b', file);
15425 return;
15427 case 'Q':
15428 if (ASSEMBLER_DIALECT == ASM_ATT)
15429 putc ('l', file);
15430 return;
15432 case 'S':
15433 if (ASSEMBLER_DIALECT == ASM_ATT)
15434 putc ('s', file);
15435 return;
15437 case 'T':
15438 if (ASSEMBLER_DIALECT == ASM_ATT)
15439 putc ('t', file);
15440 return;
15442 case 'O':
15443 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15444 if (ASSEMBLER_DIALECT != ASM_ATT)
15445 return;
15447 switch (GET_MODE_SIZE (GET_MODE (x)))
15449 case 2:
15450 putc ('w', file);
15451 break;
15453 case 4:
15454 putc ('l', file);
15455 break;
15457 case 8:
15458 putc ('q', file);
15459 break;
15461 default:
15462 output_operand_lossage
15463 ("invalid operand size for operand code 'O'");
15464 return;
15467 putc ('.', file);
15468 #endif
15469 return;
15471 case 'z':
15472 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15474 /* Opcodes don't get size suffixes if using Intel opcodes. */
15475 if (ASSEMBLER_DIALECT == ASM_INTEL)
15476 return;
15478 switch (GET_MODE_SIZE (GET_MODE (x)))
15480 case 1:
15481 putc ('b', file);
15482 return;
15484 case 2:
15485 putc ('w', file);
15486 return;
15488 case 4:
15489 putc ('l', file);
15490 return;
15492 case 8:
15493 putc ('q', file);
15494 return;
15496 default:
15497 output_operand_lossage
15498 ("invalid operand size for operand code 'z'");
15499 return;
15503 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15504 warning
15505 (0, "non-integer operand used with operand code 'z'");
15506 /* FALLTHRU */
15508 case 'Z':
15509 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15510 if (ASSEMBLER_DIALECT == ASM_INTEL)
15511 return;
15513 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15515 switch (GET_MODE_SIZE (GET_MODE (x)))
15517 case 2:
15518 #ifdef HAVE_AS_IX86_FILDS
15519 putc ('s', file);
15520 #endif
15521 return;
15523 case 4:
15524 putc ('l', file);
15525 return;
15527 case 8:
15528 #ifdef HAVE_AS_IX86_FILDQ
15529 putc ('q', file);
15530 #else
15531 fputs ("ll", file);
15532 #endif
15533 return;
15535 default:
15536 break;
15539 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15541 /* 387 opcodes don't get size suffixes
15542 if the operands are registers. */
15543 if (STACK_REG_P (x))
15544 return;
15546 switch (GET_MODE_SIZE (GET_MODE (x)))
15548 case 4:
15549 putc ('s', file);
15550 return;
15552 case 8:
15553 putc ('l', file);
15554 return;
15556 case 12:
15557 case 16:
15558 putc ('t', file);
15559 return;
15561 default:
15562 break;
15565 else
15567 output_operand_lossage
15568 ("invalid operand type used with operand code 'Z'");
15569 return;
15572 output_operand_lossage
15573 ("invalid operand size for operand code 'Z'");
15574 return;
15576 case 'd':
15577 case 'b':
15578 case 'w':
15579 case 'k':
15580 case 'q':
15581 case 'h':
15582 case 't':
15583 case 'g':
15584 case 'y':
15585 case 'x':
15586 case 'X':
15587 case 'P':
15588 case 'p':
15589 break;
15591 case 's':
15592 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15594 ix86_print_operand (file, x, 0);
15595 fputs (", ", file);
15597 return;
15599 case 'Y':
15600 switch (GET_CODE (x))
15602 case NE:
15603 fputs ("neq", file);
15604 break;
15605 case EQ:
15606 fputs ("eq", file);
15607 break;
15608 case GE:
15609 case GEU:
15610 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15611 break;
15612 case GT:
15613 case GTU:
15614 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15615 break;
15616 case LE:
15617 case LEU:
15618 fputs ("le", file);
15619 break;
15620 case LT:
15621 case LTU:
15622 fputs ("lt", file);
15623 break;
15624 case UNORDERED:
15625 fputs ("unord", file);
15626 break;
15627 case ORDERED:
15628 fputs ("ord", file);
15629 break;
15630 case UNEQ:
15631 fputs ("ueq", file);
15632 break;
15633 case UNGE:
15634 fputs ("nlt", file);
15635 break;
15636 case UNGT:
15637 fputs ("nle", file);
15638 break;
15639 case UNLE:
15640 fputs ("ule", file);
15641 break;
15642 case UNLT:
15643 fputs ("ult", file);
15644 break;
15645 case LTGT:
15646 fputs ("une", file);
15647 break;
15648 default:
15649 output_operand_lossage ("operand is not a condition code, "
15650 "invalid operand code 'Y'");
15651 return;
15653 return;
15655 case 'D':
15656 /* Little bit of braindamage here. The SSE compare instructions
15657 does use completely different names for the comparisons that the
15658 fp conditional moves. */
15659 switch (GET_CODE (x))
15661 case UNEQ:
15662 if (TARGET_AVX)
15664 fputs ("eq_us", file);
15665 break;
15667 case EQ:
15668 fputs ("eq", file);
15669 break;
15670 case UNLT:
15671 if (TARGET_AVX)
15673 fputs ("nge", file);
15674 break;
15676 case LT:
15677 fputs ("lt", file);
15678 break;
15679 case UNLE:
15680 if (TARGET_AVX)
15682 fputs ("ngt", file);
15683 break;
15685 case LE:
15686 fputs ("le", file);
15687 break;
15688 case UNORDERED:
15689 fputs ("unord", file);
15690 break;
15691 case LTGT:
15692 if (TARGET_AVX)
15694 fputs ("neq_oq", file);
15695 break;
15697 case NE:
15698 fputs ("neq", file);
15699 break;
15700 case GE:
15701 if (TARGET_AVX)
15703 fputs ("ge", file);
15704 break;
15706 case UNGE:
15707 fputs ("nlt", file);
15708 break;
15709 case GT:
15710 if (TARGET_AVX)
15712 fputs ("gt", file);
15713 break;
15715 case UNGT:
15716 fputs ("nle", file);
15717 break;
15718 case ORDERED:
15719 fputs ("ord", file);
15720 break;
15721 default:
15722 output_operand_lossage ("operand is not a condition code, "
15723 "invalid operand code 'D'");
15724 return;
15726 return;
15728 case 'F':
15729 case 'f':
15730 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15731 if (ASSEMBLER_DIALECT == ASM_ATT)
15732 putc ('.', file);
15733 #endif
15735 case 'C':
15736 case 'c':
15737 if (!COMPARISON_P (x))
15739 output_operand_lossage ("operand is not a condition code, "
15740 "invalid operand code '%c'", code);
15741 return;
15743 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15744 code == 'c' || code == 'f',
15745 code == 'F' || code == 'f',
15746 file);
15747 return;
15749 case 'H':
15750 if (!offsettable_memref_p (x))
15752 output_operand_lossage ("operand is not an offsettable memory "
15753 "reference, invalid operand code 'H'");
15754 return;
15756 /* It doesn't actually matter what mode we use here, as we're
15757 only going to use this for printing. */
15758 x = adjust_address_nv (x, DImode, 8);
15759 /* Output 'qword ptr' for intel assembler dialect. */
15760 if (ASSEMBLER_DIALECT == ASM_INTEL)
15761 code = 'q';
15762 break;
15764 case 'K':
15765 gcc_assert (CONST_INT_P (x));
15767 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15768 #ifdef HAVE_AS_IX86_HLE
15769 fputs ("xacquire ", file);
15770 #else
15771 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15772 #endif
15773 else if (INTVAL (x) & IX86_HLE_RELEASE)
15774 #ifdef HAVE_AS_IX86_HLE
15775 fputs ("xrelease ", file);
15776 #else
15777 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15778 #endif
15779 /* We do not want to print value of the operand. */
15780 return;
15782 case 'N':
15783 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15784 fputs ("{z}", file);
15785 return;
15787 case 'r':
15788 gcc_assert (CONST_INT_P (x));
15789 gcc_assert (INTVAL (x) == ROUND_SAE);
15791 if (ASSEMBLER_DIALECT == ASM_INTEL)
15792 fputs (", ", file);
15794 fputs ("{sae}", file);
15796 if (ASSEMBLER_DIALECT == ASM_ATT)
15797 fputs (", ", file);
15799 return;
15801 case 'R':
15802 gcc_assert (CONST_INT_P (x));
15804 if (ASSEMBLER_DIALECT == ASM_INTEL)
15805 fputs (", ", file);
15807 switch (INTVAL (x))
15809 case ROUND_NEAREST_INT | ROUND_SAE:
15810 fputs ("{rn-sae}", file);
15811 break;
15812 case ROUND_NEG_INF | ROUND_SAE:
15813 fputs ("{rd-sae}", file);
15814 break;
15815 case ROUND_POS_INF | ROUND_SAE:
15816 fputs ("{ru-sae}", file);
15817 break;
15818 case ROUND_ZERO | ROUND_SAE:
15819 fputs ("{rz-sae}", file);
15820 break;
15821 default:
15822 gcc_unreachable ();
15825 if (ASSEMBLER_DIALECT == ASM_ATT)
15826 fputs (", ", file);
15828 return;
15830 case '*':
15831 if (ASSEMBLER_DIALECT == ASM_ATT)
15832 putc ('*', file);
15833 return;
15835 case '&':
15837 const char *name = get_some_local_dynamic_name ();
15838 if (name == NULL)
15839 output_operand_lossage ("'%%&' used without any "
15840 "local dynamic TLS references");
15841 else
15842 assemble_name (file, name);
15843 return;
15846 case '+':
15848 rtx x;
15850 if (!optimize
15851 || optimize_function_for_size_p (cfun)
15852 || !TARGET_BRANCH_PREDICTION_HINTS)
15853 return;
15855 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15856 if (x)
15858 int pred_val = XINT (x, 0);
15860 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15861 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15863 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15864 bool cputaken
15865 = final_forward_branch_p (current_output_insn) == 0;
15867 /* Emit hints only in the case default branch prediction
15868 heuristics would fail. */
15869 if (taken != cputaken)
15871 /* We use 3e (DS) prefix for taken branches and
15872 2e (CS) prefix for not taken branches. */
15873 if (taken)
15874 fputs ("ds ; ", file);
15875 else
15876 fputs ("cs ; ", file);
15880 return;
15883 case ';':
15884 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15885 putc (';', file);
15886 #endif
15887 return;
15889 case '@':
15890 if (ASSEMBLER_DIALECT == ASM_ATT)
15891 putc ('%', file);
15893 /* The kernel uses a different segment register for performance
15894 reasons; a system call would not have to trash the userspace
15895 segment register, which would be expensive. */
15896 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15897 fputs ("fs", file);
15898 else
15899 fputs ("gs", file);
15900 return;
15902 case '~':
15903 putc (TARGET_AVX2 ? 'i' : 'f', file);
15904 return;
15906 case '^':
15907 if (TARGET_64BIT && Pmode != word_mode)
15908 fputs ("addr32 ", file);
15909 return;
15911 case '!':
15912 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15913 fputs ("bnd ", file);
15914 return;
15916 default:
15917 output_operand_lossage ("invalid operand code '%c'", code);
15921 if (REG_P (x))
15922 print_reg (x, code, file);
15924 else if (MEM_P (x))
15926 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15927 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15928 && GET_MODE (x) != BLKmode)
15930 const char * size;
15931 switch (GET_MODE_SIZE (GET_MODE (x)))
15933 case 1: size = "BYTE"; break;
15934 case 2: size = "WORD"; break;
15935 case 4: size = "DWORD"; break;
15936 case 8: size = "QWORD"; break;
15937 case 12: size = "TBYTE"; break;
15938 case 16:
15939 if (GET_MODE (x) == XFmode)
15940 size = "TBYTE";
15941 else
15942 size = "XMMWORD";
15943 break;
15944 case 32: size = "YMMWORD"; break;
15945 case 64: size = "ZMMWORD"; break;
15946 default:
15947 gcc_unreachable ();
15950 /* Check for explicit size override (codes 'b', 'w', 'k',
15951 'q' and 'x') */
15952 if (code == 'b')
15953 size = "BYTE";
15954 else if (code == 'w')
15955 size = "WORD";
15956 else if (code == 'k')
15957 size = "DWORD";
15958 else if (code == 'q')
15959 size = "QWORD";
15960 else if (code == 'x')
15961 size = "XMMWORD";
15963 fputs (size, file);
15964 fputs (" PTR ", file);
15967 x = XEXP (x, 0);
15968 /* Avoid (%rip) for call operands. */
15969 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15970 && !CONST_INT_P (x))
15971 output_addr_const (file, x);
15972 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15973 output_operand_lossage ("invalid constraints for operand");
15974 else
15975 output_address (x);
15978 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15980 REAL_VALUE_TYPE r;
15981 long l;
15983 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15984 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15986 if (ASSEMBLER_DIALECT == ASM_ATT)
15987 putc ('$', file);
15988 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15989 if (code == 'q')
15990 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15991 (unsigned long long) (int) l);
15992 else
15993 fprintf (file, "0x%08x", (unsigned int) l);
15996 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15998 REAL_VALUE_TYPE r;
15999 long l[2];
16001 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16002 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16004 if (ASSEMBLER_DIALECT == ASM_ATT)
16005 putc ('$', file);
16006 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16009 /* These float cases don't actually occur as immediate operands. */
16010 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16012 char dstr[30];
16014 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16015 fputs (dstr, file);
16018 else
16020 /* We have patterns that allow zero sets of memory, for instance.
16021 In 64-bit mode, we should probably support all 8-byte vectors,
16022 since we can in fact encode that into an immediate. */
16023 if (GET_CODE (x) == CONST_VECTOR)
16025 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16026 x = const0_rtx;
16029 if (code != 'P' && code != 'p')
16031 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16033 if (ASSEMBLER_DIALECT == ASM_ATT)
16034 putc ('$', file);
16036 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16037 || GET_CODE (x) == LABEL_REF)
16039 if (ASSEMBLER_DIALECT == ASM_ATT)
16040 putc ('$', file);
16041 else
16042 fputs ("OFFSET FLAT:", file);
16045 if (CONST_INT_P (x))
16046 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16047 else if (flag_pic || MACHOPIC_INDIRECT)
16048 output_pic_addr_const (file, x, code);
16049 else
16050 output_addr_const (file, x);
16054 static bool
16055 ix86_print_operand_punct_valid_p (unsigned char code)
16057 return (code == '@' || code == '*' || code == '+' || code == '&'
16058 || code == ';' || code == '~' || code == '^' || code == '!');
16061 /* Print a memory operand whose address is ADDR. */
16063 static void
16064 ix86_print_operand_address (FILE *file, rtx addr)
16066 struct ix86_address parts;
16067 rtx base, index, disp;
16068 int scale;
16069 int ok;
16070 bool vsib = false;
16071 int code = 0;
16073 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16075 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16076 gcc_assert (parts.index == NULL_RTX);
16077 parts.index = XVECEXP (addr, 0, 1);
16078 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16079 addr = XVECEXP (addr, 0, 0);
16080 vsib = true;
16082 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16084 gcc_assert (TARGET_64BIT);
16085 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16086 code = 'q';
16088 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16090 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16091 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16092 if (parts.base != NULL_RTX)
16094 parts.index = parts.base;
16095 parts.scale = 1;
16097 parts.base = XVECEXP (addr, 0, 0);
16098 addr = XVECEXP (addr, 0, 0);
16100 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16102 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16103 gcc_assert (parts.index == NULL_RTX);
16104 parts.index = XVECEXP (addr, 0, 1);
16105 addr = XVECEXP (addr, 0, 0);
16107 else
16108 ok = ix86_decompose_address (addr, &parts);
16110 gcc_assert (ok);
16112 base = parts.base;
16113 index = parts.index;
16114 disp = parts.disp;
16115 scale = parts.scale;
16117 switch (parts.seg)
16119 case SEG_DEFAULT:
16120 break;
16121 case SEG_FS:
16122 case SEG_GS:
16123 if (ASSEMBLER_DIALECT == ASM_ATT)
16124 putc ('%', file);
16125 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16126 break;
16127 default:
16128 gcc_unreachable ();
16131 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16132 if (TARGET_64BIT && !base && !index)
16134 rtx symbol = disp;
16136 if (GET_CODE (disp) == CONST
16137 && GET_CODE (XEXP (disp, 0)) == PLUS
16138 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16139 symbol = XEXP (XEXP (disp, 0), 0);
16141 if (GET_CODE (symbol) == LABEL_REF
16142 || (GET_CODE (symbol) == SYMBOL_REF
16143 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16144 base = pc_rtx;
16146 if (!base && !index)
16148 /* Displacement only requires special attention. */
16150 if (CONST_INT_P (disp))
16152 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16153 fputs ("ds:", file);
16154 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16156 else if (flag_pic)
16157 output_pic_addr_const (file, disp, 0);
16158 else
16159 output_addr_const (file, disp);
16161 else
16163 /* Print SImode register names to force addr32 prefix. */
16164 if (SImode_address_operand (addr, VOIDmode))
16166 #ifdef ENABLE_CHECKING
16167 gcc_assert (TARGET_64BIT);
16168 switch (GET_CODE (addr))
16170 case SUBREG:
16171 gcc_assert (GET_MODE (addr) == SImode);
16172 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16173 break;
16174 case ZERO_EXTEND:
16175 case AND:
16176 gcc_assert (GET_MODE (addr) == DImode);
16177 break;
16178 default:
16179 gcc_unreachable ();
16181 #endif
16182 gcc_assert (!code);
16183 code = 'k';
16185 else if (code == 0
16186 && TARGET_X32
16187 && disp
16188 && CONST_INT_P (disp)
16189 && INTVAL (disp) < -16*1024*1024)
16191 /* X32 runs in 64-bit mode, where displacement, DISP, in
16192 address DISP(%r64), is encoded as 32-bit immediate sign-
16193 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16194 address is %r64 + 0xffffffffbffffd00. When %r64 <
16195 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16196 which is invalid for x32. The correct address is %r64
16197 - 0x40000300 == 0xf7ffdd64. To properly encode
16198 -0x40000300(%r64) for x32, we zero-extend negative
16199 displacement by forcing addr32 prefix which truncates
16200 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16201 zero-extend all negative displacements, including -1(%rsp).
16202 However, for small negative displacements, sign-extension
16203 won't cause overflow. We only zero-extend negative
16204 displacements if they < -16*1024*1024, which is also used
16205 to check legitimate address displacements for PIC. */
16206 code = 'k';
16209 if (ASSEMBLER_DIALECT == ASM_ATT)
16211 if (disp)
16213 if (flag_pic)
16214 output_pic_addr_const (file, disp, 0);
16215 else if (GET_CODE (disp) == LABEL_REF)
16216 output_asm_label (disp);
16217 else
16218 output_addr_const (file, disp);
16221 putc ('(', file);
16222 if (base)
16223 print_reg (base, code, file);
16224 if (index)
16226 putc (',', file);
16227 print_reg (index, vsib ? 0 : code, file);
16228 if (scale != 1 || vsib)
16229 fprintf (file, ",%d", scale);
16231 putc (')', file);
16233 else
16235 rtx offset = NULL_RTX;
16237 if (disp)
16239 /* Pull out the offset of a symbol; print any symbol itself. */
16240 if (GET_CODE (disp) == CONST
16241 && GET_CODE (XEXP (disp, 0)) == PLUS
16242 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16244 offset = XEXP (XEXP (disp, 0), 1);
16245 disp = gen_rtx_CONST (VOIDmode,
16246 XEXP (XEXP (disp, 0), 0));
16249 if (flag_pic)
16250 output_pic_addr_const (file, disp, 0);
16251 else if (GET_CODE (disp) == LABEL_REF)
16252 output_asm_label (disp);
16253 else if (CONST_INT_P (disp))
16254 offset = disp;
16255 else
16256 output_addr_const (file, disp);
16259 putc ('[', file);
16260 if (base)
16262 print_reg (base, code, file);
16263 if (offset)
16265 if (INTVAL (offset) >= 0)
16266 putc ('+', file);
16267 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16270 else if (offset)
16271 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16272 else
16273 putc ('0', file);
16275 if (index)
16277 putc ('+', file);
16278 print_reg (index, vsib ? 0 : code, file);
16279 if (scale != 1 || vsib)
16280 fprintf (file, "*%d", scale);
16282 putc (']', file);
16287 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16289 static bool
16290 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16292 rtx op;
16294 if (GET_CODE (x) != UNSPEC)
16295 return false;
16297 op = XVECEXP (x, 0, 0);
16298 switch (XINT (x, 1))
16300 case UNSPEC_GOTTPOFF:
16301 output_addr_const (file, op);
16302 /* FIXME: This might be @TPOFF in Sun ld. */
16303 fputs ("@gottpoff", file);
16304 break;
16305 case UNSPEC_TPOFF:
16306 output_addr_const (file, op);
16307 fputs ("@tpoff", file);
16308 break;
16309 case UNSPEC_NTPOFF:
16310 output_addr_const (file, op);
16311 if (TARGET_64BIT)
16312 fputs ("@tpoff", file);
16313 else
16314 fputs ("@ntpoff", file);
16315 break;
16316 case UNSPEC_DTPOFF:
16317 output_addr_const (file, op);
16318 fputs ("@dtpoff", file);
16319 break;
16320 case UNSPEC_GOTNTPOFF:
16321 output_addr_const (file, op);
16322 if (TARGET_64BIT)
16323 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16324 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16325 else
16326 fputs ("@gotntpoff", file);
16327 break;
16328 case UNSPEC_INDNTPOFF:
16329 output_addr_const (file, op);
16330 fputs ("@indntpoff", file);
16331 break;
16332 #if TARGET_MACHO
16333 case UNSPEC_MACHOPIC_OFFSET:
16334 output_addr_const (file, op);
16335 putc ('-', file);
16336 machopic_output_function_base_name (file);
16337 break;
16338 #endif
16340 case UNSPEC_STACK_CHECK:
16342 int offset;
16344 gcc_assert (flag_split_stack);
16346 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16347 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16348 #else
16349 gcc_unreachable ();
16350 #endif
16352 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16354 break;
16356 default:
16357 return false;
16360 return true;
16363 /* Split one or more double-mode RTL references into pairs of half-mode
16364 references. The RTL can be REG, offsettable MEM, integer constant, or
16365 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16366 split and "num" is its length. lo_half and hi_half are output arrays
16367 that parallel "operands". */
16369 void
16370 split_double_mode (machine_mode mode, rtx operands[],
16371 int num, rtx lo_half[], rtx hi_half[])
16373 machine_mode half_mode;
16374 unsigned int byte;
16376 switch (mode)
16378 case TImode:
16379 half_mode = DImode;
16380 break;
16381 case DImode:
16382 half_mode = SImode;
16383 break;
16384 default:
16385 gcc_unreachable ();
16388 byte = GET_MODE_SIZE (half_mode);
16390 while (num--)
16392 rtx op = operands[num];
16394 /* simplify_subreg refuse to split volatile memory addresses,
16395 but we still have to handle it. */
16396 if (MEM_P (op))
16398 lo_half[num] = adjust_address (op, half_mode, 0);
16399 hi_half[num] = adjust_address (op, half_mode, byte);
16401 else
16403 lo_half[num] = simplify_gen_subreg (half_mode, op,
16404 GET_MODE (op) == VOIDmode
16405 ? mode : GET_MODE (op), 0);
16406 hi_half[num] = simplify_gen_subreg (half_mode, op,
16407 GET_MODE (op) == VOIDmode
16408 ? mode : GET_MODE (op), byte);
16413 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16414 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16415 is the expression of the binary operation. The output may either be
16416 emitted here, or returned to the caller, like all output_* functions.
16418 There is no guarantee that the operands are the same mode, as they
16419 might be within FLOAT or FLOAT_EXTEND expressions. */
16421 #ifndef SYSV386_COMPAT
16422 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16423 wants to fix the assemblers because that causes incompatibility
16424 with gcc. No-one wants to fix gcc because that causes
16425 incompatibility with assemblers... You can use the option of
16426 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16427 #define SYSV386_COMPAT 1
16428 #endif
16430 const char *
16431 output_387_binary_op (rtx insn, rtx *operands)
16433 static char buf[40];
16434 const char *p;
16435 const char *ssep;
16436 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16438 #ifdef ENABLE_CHECKING
16439 /* Even if we do not want to check the inputs, this documents input
16440 constraints. Which helps in understanding the following code. */
16441 if (STACK_REG_P (operands[0])
16442 && ((REG_P (operands[1])
16443 && REGNO (operands[0]) == REGNO (operands[1])
16444 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16445 || (REG_P (operands[2])
16446 && REGNO (operands[0]) == REGNO (operands[2])
16447 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16448 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16449 ; /* ok */
16450 else
16451 gcc_assert (is_sse);
16452 #endif
16454 switch (GET_CODE (operands[3]))
16456 case PLUS:
16457 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16458 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16459 p = "fiadd";
16460 else
16461 p = "fadd";
16462 ssep = "vadd";
16463 break;
16465 case MINUS:
16466 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16467 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16468 p = "fisub";
16469 else
16470 p = "fsub";
16471 ssep = "vsub";
16472 break;
16474 case MULT:
16475 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16476 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16477 p = "fimul";
16478 else
16479 p = "fmul";
16480 ssep = "vmul";
16481 break;
16483 case DIV:
16484 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16485 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16486 p = "fidiv";
16487 else
16488 p = "fdiv";
16489 ssep = "vdiv";
16490 break;
16492 default:
16493 gcc_unreachable ();
16496 if (is_sse)
16498 if (TARGET_AVX)
16500 strcpy (buf, ssep);
16501 if (GET_MODE (operands[0]) == SFmode)
16502 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16503 else
16504 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16506 else
16508 strcpy (buf, ssep + 1);
16509 if (GET_MODE (operands[0]) == SFmode)
16510 strcat (buf, "ss\t{%2, %0|%0, %2}");
16511 else
16512 strcat (buf, "sd\t{%2, %0|%0, %2}");
16514 return buf;
16516 strcpy (buf, p);
16518 switch (GET_CODE (operands[3]))
16520 case MULT:
16521 case PLUS:
16522 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16523 std::swap (operands[1], operands[2]);
16525 /* know operands[0] == operands[1]. */
16527 if (MEM_P (operands[2]))
16529 p = "%Z2\t%2";
16530 break;
16533 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16535 if (STACK_TOP_P (operands[0]))
16536 /* How is it that we are storing to a dead operand[2]?
16537 Well, presumably operands[1] is dead too. We can't
16538 store the result to st(0) as st(0) gets popped on this
16539 instruction. Instead store to operands[2] (which I
16540 think has to be st(1)). st(1) will be popped later.
16541 gcc <= 2.8.1 didn't have this check and generated
16542 assembly code that the Unixware assembler rejected. */
16543 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16544 else
16545 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16546 break;
16549 if (STACK_TOP_P (operands[0]))
16550 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16551 else
16552 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16553 break;
16555 case MINUS:
16556 case DIV:
16557 if (MEM_P (operands[1]))
16559 p = "r%Z1\t%1";
16560 break;
16563 if (MEM_P (operands[2]))
16565 p = "%Z2\t%2";
16566 break;
16569 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16571 #if SYSV386_COMPAT
16572 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16573 derived assemblers, confusingly reverse the direction of
16574 the operation for fsub{r} and fdiv{r} when the
16575 destination register is not st(0). The Intel assembler
16576 doesn't have this brain damage. Read !SYSV386_COMPAT to
16577 figure out what the hardware really does. */
16578 if (STACK_TOP_P (operands[0]))
16579 p = "{p\t%0, %2|rp\t%2, %0}";
16580 else
16581 p = "{rp\t%2, %0|p\t%0, %2}";
16582 #else
16583 if (STACK_TOP_P (operands[0]))
16584 /* As above for fmul/fadd, we can't store to st(0). */
16585 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16586 else
16587 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16588 #endif
16589 break;
16592 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16594 #if SYSV386_COMPAT
16595 if (STACK_TOP_P (operands[0]))
16596 p = "{rp\t%0, %1|p\t%1, %0}";
16597 else
16598 p = "{p\t%1, %0|rp\t%0, %1}";
16599 #else
16600 if (STACK_TOP_P (operands[0]))
16601 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16602 else
16603 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16604 #endif
16605 break;
16608 if (STACK_TOP_P (operands[0]))
16610 if (STACK_TOP_P (operands[1]))
16611 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16612 else
16613 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16614 break;
16616 else if (STACK_TOP_P (operands[1]))
16618 #if SYSV386_COMPAT
16619 p = "{\t%1, %0|r\t%0, %1}";
16620 #else
16621 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16622 #endif
16624 else
16626 #if SYSV386_COMPAT
16627 p = "{r\t%2, %0|\t%0, %2}";
16628 #else
16629 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16630 #endif
16632 break;
16634 default:
16635 gcc_unreachable ();
16638 strcat (buf, p);
16639 return buf;
16642 /* Check if a 256bit AVX register is referenced inside of EXP. */
16644 static bool
16645 ix86_check_avx256_register (const_rtx exp)
16647 if (GET_CODE (exp) == SUBREG)
16648 exp = SUBREG_REG (exp);
16650 return (REG_P (exp)
16651 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16654 /* Return needed mode for entity in optimize_mode_switching pass. */
16656 static int
16657 ix86_avx_u128_mode_needed (rtx_insn *insn)
16659 if (CALL_P (insn))
16661 rtx link;
16663 /* Needed mode is set to AVX_U128_CLEAN if there are
16664 no 256bit modes used in function arguments. */
16665 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16666 link;
16667 link = XEXP (link, 1))
16669 if (GET_CODE (XEXP (link, 0)) == USE)
16671 rtx arg = XEXP (XEXP (link, 0), 0);
16673 if (ix86_check_avx256_register (arg))
16674 return AVX_U128_DIRTY;
16678 return AVX_U128_CLEAN;
16681 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16682 changes state only when a 256bit register is written to, but we need
16683 to prevent the compiler from moving optimal insertion point above
16684 eventual read from 256bit register. */
16685 subrtx_iterator::array_type array;
16686 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16687 if (ix86_check_avx256_register (*iter))
16688 return AVX_U128_DIRTY;
16690 return AVX_U128_ANY;
16693 /* Return mode that i387 must be switched into
16694 prior to the execution of insn. */
16696 static int
16697 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16699 enum attr_i387_cw mode;
16701 /* The mode UNINITIALIZED is used to store control word after a
16702 function call or ASM pattern. The mode ANY specify that function
16703 has no requirements on the control word and make no changes in the
16704 bits we are interested in. */
16706 if (CALL_P (insn)
16707 || (NONJUMP_INSN_P (insn)
16708 && (asm_noperands (PATTERN (insn)) >= 0
16709 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16710 return I387_CW_UNINITIALIZED;
16712 if (recog_memoized (insn) < 0)
16713 return I387_CW_ANY;
16715 mode = get_attr_i387_cw (insn);
16717 switch (entity)
16719 case I387_TRUNC:
16720 if (mode == I387_CW_TRUNC)
16721 return mode;
16722 break;
16724 case I387_FLOOR:
16725 if (mode == I387_CW_FLOOR)
16726 return mode;
16727 break;
16729 case I387_CEIL:
16730 if (mode == I387_CW_CEIL)
16731 return mode;
16732 break;
16734 case I387_MASK_PM:
16735 if (mode == I387_CW_MASK_PM)
16736 return mode;
16737 break;
16739 default:
16740 gcc_unreachable ();
16743 return I387_CW_ANY;
16746 /* Return mode that entity must be switched into
16747 prior to the execution of insn. */
16749 static int
16750 ix86_mode_needed (int entity, rtx_insn *insn)
16752 switch (entity)
16754 case AVX_U128:
16755 return ix86_avx_u128_mode_needed (insn);
16756 case I387_TRUNC:
16757 case I387_FLOOR:
16758 case I387_CEIL:
16759 case I387_MASK_PM:
16760 return ix86_i387_mode_needed (entity, insn);
16761 default:
16762 gcc_unreachable ();
16764 return 0;
16767 /* Check if a 256bit AVX register is referenced in stores. */
16769 static void
16770 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16772 if (ix86_check_avx256_register (dest))
16774 bool *used = (bool *) data;
16775 *used = true;
16779 /* Calculate mode of upper 128bit AVX registers after the insn. */
16781 static int
16782 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16784 rtx pat = PATTERN (insn);
16786 if (vzeroupper_operation (pat, VOIDmode)
16787 || vzeroall_operation (pat, VOIDmode))
16788 return AVX_U128_CLEAN;
16790 /* We know that state is clean after CALL insn if there are no
16791 256bit registers used in the function return register. */
16792 if (CALL_P (insn))
16794 bool avx_reg256_found = false;
16795 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16797 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16800 /* Otherwise, return current mode. Remember that if insn
16801 references AVX 256bit registers, the mode was already changed
16802 to DIRTY from MODE_NEEDED. */
16803 return mode;
16806 /* Return the mode that an insn results in. */
16808 static int
16809 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16811 switch (entity)
16813 case AVX_U128:
16814 return ix86_avx_u128_mode_after (mode, insn);
16815 case I387_TRUNC:
16816 case I387_FLOOR:
16817 case I387_CEIL:
16818 case I387_MASK_PM:
16819 return mode;
16820 default:
16821 gcc_unreachable ();
16825 static int
16826 ix86_avx_u128_mode_entry (void)
16828 tree arg;
16830 /* Entry mode is set to AVX_U128_DIRTY if there are
16831 256bit modes used in function arguments. */
16832 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16833 arg = TREE_CHAIN (arg))
16835 rtx incoming = DECL_INCOMING_RTL (arg);
16837 if (incoming && ix86_check_avx256_register (incoming))
16838 return AVX_U128_DIRTY;
16841 return AVX_U128_CLEAN;
16844 /* Return a mode that ENTITY is assumed to be
16845 switched to at function entry. */
16847 static int
16848 ix86_mode_entry (int entity)
16850 switch (entity)
16852 case AVX_U128:
16853 return ix86_avx_u128_mode_entry ();
16854 case I387_TRUNC:
16855 case I387_FLOOR:
16856 case I387_CEIL:
16857 case I387_MASK_PM:
16858 return I387_CW_ANY;
16859 default:
16860 gcc_unreachable ();
16864 static int
16865 ix86_avx_u128_mode_exit (void)
16867 rtx reg = crtl->return_rtx;
16869 /* Exit mode is set to AVX_U128_DIRTY if there are
16870 256bit modes used in the function return register. */
16871 if (reg && ix86_check_avx256_register (reg))
16872 return AVX_U128_DIRTY;
16874 return AVX_U128_CLEAN;
16877 /* Return a mode that ENTITY is assumed to be
16878 switched to at function exit. */
16880 static int
16881 ix86_mode_exit (int entity)
16883 switch (entity)
16885 case AVX_U128:
16886 return ix86_avx_u128_mode_exit ();
16887 case I387_TRUNC:
16888 case I387_FLOOR:
16889 case I387_CEIL:
16890 case I387_MASK_PM:
16891 return I387_CW_ANY;
16892 default:
16893 gcc_unreachable ();
16897 static int
16898 ix86_mode_priority (int, int n)
16900 return n;
16903 /* Output code to initialize control word copies used by trunc?f?i and
16904 rounding patterns. CURRENT_MODE is set to current control word,
16905 while NEW_MODE is set to new control word. */
16907 static void
16908 emit_i387_cw_initialization (int mode)
16910 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16911 rtx new_mode;
16913 enum ix86_stack_slot slot;
16915 rtx reg = gen_reg_rtx (HImode);
16917 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16918 emit_move_insn (reg, copy_rtx (stored_mode));
16920 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16921 || optimize_insn_for_size_p ())
16923 switch (mode)
16925 case I387_CW_TRUNC:
16926 /* round toward zero (truncate) */
16927 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16928 slot = SLOT_CW_TRUNC;
16929 break;
16931 case I387_CW_FLOOR:
16932 /* round down toward -oo */
16933 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16934 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16935 slot = SLOT_CW_FLOOR;
16936 break;
16938 case I387_CW_CEIL:
16939 /* round up toward +oo */
16940 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16941 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16942 slot = SLOT_CW_CEIL;
16943 break;
16945 case I387_CW_MASK_PM:
16946 /* mask precision exception for nearbyint() */
16947 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16948 slot = SLOT_CW_MASK_PM;
16949 break;
16951 default:
16952 gcc_unreachable ();
16955 else
16957 switch (mode)
16959 case I387_CW_TRUNC:
16960 /* round toward zero (truncate) */
16961 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16962 slot = SLOT_CW_TRUNC;
16963 break;
16965 case I387_CW_FLOOR:
16966 /* round down toward -oo */
16967 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16968 slot = SLOT_CW_FLOOR;
16969 break;
16971 case I387_CW_CEIL:
16972 /* round up toward +oo */
16973 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16974 slot = SLOT_CW_CEIL;
16975 break;
16977 case I387_CW_MASK_PM:
16978 /* mask precision exception for nearbyint() */
16979 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16980 slot = SLOT_CW_MASK_PM;
16981 break;
16983 default:
16984 gcc_unreachable ();
16988 gcc_assert (slot < MAX_386_STACK_LOCALS);
16990 new_mode = assign_386_stack_local (HImode, slot);
16991 emit_move_insn (new_mode, reg);
16994 /* Emit vzeroupper. */
16996 void
16997 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16999 int i;
17001 /* Cancel automatic vzeroupper insertion if there are
17002 live call-saved SSE registers at the insertion point. */
17004 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17005 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17006 return;
17008 if (TARGET_64BIT)
17009 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17010 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17011 return;
17013 emit_insn (gen_avx_vzeroupper ());
17016 /* Generate one or more insns to set ENTITY to MODE. */
17018 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17019 is the set of hard registers live at the point where the insn(s)
17020 are to be inserted. */
17022 static void
17023 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17024 HARD_REG_SET regs_live)
17026 switch (entity)
17028 case AVX_U128:
17029 if (mode == AVX_U128_CLEAN)
17030 ix86_avx_emit_vzeroupper (regs_live);
17031 break;
17032 case I387_TRUNC:
17033 case I387_FLOOR:
17034 case I387_CEIL:
17035 case I387_MASK_PM:
17036 if (mode != I387_CW_ANY
17037 && mode != I387_CW_UNINITIALIZED)
17038 emit_i387_cw_initialization (mode);
17039 break;
17040 default:
17041 gcc_unreachable ();
17045 /* Output code for INSN to convert a float to a signed int. OPERANDS
17046 are the insn operands. The output may be [HSD]Imode and the input
17047 operand may be [SDX]Fmode. */
17049 const char *
17050 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17052 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17053 int dimode_p = GET_MODE (operands[0]) == DImode;
17054 int round_mode = get_attr_i387_cw (insn);
17056 /* Jump through a hoop or two for DImode, since the hardware has no
17057 non-popping instruction. We used to do this a different way, but
17058 that was somewhat fragile and broke with post-reload splitters. */
17059 if ((dimode_p || fisttp) && !stack_top_dies)
17060 output_asm_insn ("fld\t%y1", operands);
17062 gcc_assert (STACK_TOP_P (operands[1]));
17063 gcc_assert (MEM_P (operands[0]));
17064 gcc_assert (GET_MODE (operands[1]) != TFmode);
17066 if (fisttp)
17067 output_asm_insn ("fisttp%Z0\t%0", operands);
17068 else
17070 if (round_mode != I387_CW_ANY)
17071 output_asm_insn ("fldcw\t%3", operands);
17072 if (stack_top_dies || dimode_p)
17073 output_asm_insn ("fistp%Z0\t%0", operands);
17074 else
17075 output_asm_insn ("fist%Z0\t%0", operands);
17076 if (round_mode != I387_CW_ANY)
17077 output_asm_insn ("fldcw\t%2", operands);
17080 return "";
17083 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17084 have the values zero or one, indicates the ffreep insn's operand
17085 from the OPERANDS array. */
17087 static const char *
17088 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17090 if (TARGET_USE_FFREEP)
17091 #ifdef HAVE_AS_IX86_FFREEP
17092 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17093 #else
17095 static char retval[32];
17096 int regno = REGNO (operands[opno]);
17098 gcc_assert (STACK_REGNO_P (regno));
17100 regno -= FIRST_STACK_REG;
17102 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17103 return retval;
17105 #endif
17107 return opno ? "fstp\t%y1" : "fstp\t%y0";
17111 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17112 should be used. UNORDERED_P is true when fucom should be used. */
17114 const char *
17115 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17117 int stack_top_dies;
17118 rtx cmp_op0, cmp_op1;
17119 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17121 if (eflags_p)
17123 cmp_op0 = operands[0];
17124 cmp_op1 = operands[1];
17126 else
17128 cmp_op0 = operands[1];
17129 cmp_op1 = operands[2];
17132 if (is_sse)
17134 if (GET_MODE (operands[0]) == SFmode)
17135 if (unordered_p)
17136 return "%vucomiss\t{%1, %0|%0, %1}";
17137 else
17138 return "%vcomiss\t{%1, %0|%0, %1}";
17139 else
17140 if (unordered_p)
17141 return "%vucomisd\t{%1, %0|%0, %1}";
17142 else
17143 return "%vcomisd\t{%1, %0|%0, %1}";
17146 gcc_assert (STACK_TOP_P (cmp_op0));
17148 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17150 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17152 if (stack_top_dies)
17154 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17155 return output_387_ffreep (operands, 1);
17157 else
17158 return "ftst\n\tfnstsw\t%0";
17161 if (STACK_REG_P (cmp_op1)
17162 && stack_top_dies
17163 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17164 && REGNO (cmp_op1) != FIRST_STACK_REG)
17166 /* If both the top of the 387 stack dies, and the other operand
17167 is also a stack register that dies, then this must be a
17168 `fcompp' float compare */
17170 if (eflags_p)
17172 /* There is no double popping fcomi variant. Fortunately,
17173 eflags is immune from the fstp's cc clobbering. */
17174 if (unordered_p)
17175 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17176 else
17177 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17178 return output_387_ffreep (operands, 0);
17180 else
17182 if (unordered_p)
17183 return "fucompp\n\tfnstsw\t%0";
17184 else
17185 return "fcompp\n\tfnstsw\t%0";
17188 else
17190 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17192 static const char * const alt[16] =
17194 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17195 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17196 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17197 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17199 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17200 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17201 NULL,
17202 NULL,
17204 "fcomi\t{%y1, %0|%0, %y1}",
17205 "fcomip\t{%y1, %0|%0, %y1}",
17206 "fucomi\t{%y1, %0|%0, %y1}",
17207 "fucomip\t{%y1, %0|%0, %y1}",
17209 NULL,
17210 NULL,
17211 NULL,
17212 NULL
17215 int mask;
17216 const char *ret;
17218 mask = eflags_p << 3;
17219 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17220 mask |= unordered_p << 1;
17221 mask |= stack_top_dies;
17223 gcc_assert (mask < 16);
17224 ret = alt[mask];
17225 gcc_assert (ret);
17227 return ret;
17231 void
17232 ix86_output_addr_vec_elt (FILE *file, int value)
17234 const char *directive = ASM_LONG;
17236 #ifdef ASM_QUAD
17237 if (TARGET_LP64)
17238 directive = ASM_QUAD;
17239 #else
17240 gcc_assert (!TARGET_64BIT);
17241 #endif
17243 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17246 void
17247 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17249 const char *directive = ASM_LONG;
17251 #ifdef ASM_QUAD
17252 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17253 directive = ASM_QUAD;
17254 #else
17255 gcc_assert (!TARGET_64BIT);
17256 #endif
17257 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17258 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17259 fprintf (file, "%s%s%d-%s%d\n",
17260 directive, LPREFIX, value, LPREFIX, rel);
17261 else if (HAVE_AS_GOTOFF_IN_DATA)
17262 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17263 #if TARGET_MACHO
17264 else if (TARGET_MACHO)
17266 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17267 machopic_output_function_base_name (file);
17268 putc ('\n', file);
17270 #endif
17271 else
17272 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17273 GOT_SYMBOL_NAME, LPREFIX, value);
17276 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17277 for the target. */
17279 void
17280 ix86_expand_clear (rtx dest)
17282 rtx tmp;
17284 /* We play register width games, which are only valid after reload. */
17285 gcc_assert (reload_completed);
17287 /* Avoid HImode and its attendant prefix byte. */
17288 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17289 dest = gen_rtx_REG (SImode, REGNO (dest));
17290 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17292 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17294 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17295 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17298 emit_insn (tmp);
17301 /* X is an unchanging MEM. If it is a constant pool reference, return
17302 the constant pool rtx, else NULL. */
17305 maybe_get_pool_constant (rtx x)
17307 x = ix86_delegitimize_address (XEXP (x, 0));
17309 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17310 return get_pool_constant (x);
17312 return NULL_RTX;
17315 void
17316 ix86_expand_move (machine_mode mode, rtx operands[])
17318 rtx op0, op1;
17319 enum tls_model model;
17321 op0 = operands[0];
17322 op1 = operands[1];
17324 if (GET_CODE (op1) == SYMBOL_REF)
17326 rtx tmp;
17328 model = SYMBOL_REF_TLS_MODEL (op1);
17329 if (model)
17331 op1 = legitimize_tls_address (op1, model, true);
17332 op1 = force_operand (op1, op0);
17333 if (op1 == op0)
17334 return;
17335 op1 = convert_to_mode (mode, op1, 1);
17337 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17338 op1 = tmp;
17340 else if (GET_CODE (op1) == CONST
17341 && GET_CODE (XEXP (op1, 0)) == PLUS
17342 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17344 rtx addend = XEXP (XEXP (op1, 0), 1);
17345 rtx symbol = XEXP (XEXP (op1, 0), 0);
17346 rtx tmp;
17348 model = SYMBOL_REF_TLS_MODEL (symbol);
17349 if (model)
17350 tmp = legitimize_tls_address (symbol, model, true);
17351 else
17352 tmp = legitimize_pe_coff_symbol (symbol, true);
17354 if (tmp)
17356 tmp = force_operand (tmp, NULL);
17357 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17358 op0, 1, OPTAB_DIRECT);
17359 if (tmp == op0)
17360 return;
17361 op1 = convert_to_mode (mode, tmp, 1);
17365 if ((flag_pic || MACHOPIC_INDIRECT)
17366 && symbolic_operand (op1, mode))
17368 if (TARGET_MACHO && !TARGET_64BIT)
17370 #if TARGET_MACHO
17371 /* dynamic-no-pic */
17372 if (MACHOPIC_INDIRECT)
17374 rtx temp = ((reload_in_progress
17375 || ((op0 && REG_P (op0))
17376 && mode == Pmode))
17377 ? op0 : gen_reg_rtx (Pmode));
17378 op1 = machopic_indirect_data_reference (op1, temp);
17379 if (MACHOPIC_PURE)
17380 op1 = machopic_legitimize_pic_address (op1, mode,
17381 temp == op1 ? 0 : temp);
17383 if (op0 != op1 && GET_CODE (op0) != MEM)
17385 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17386 emit_insn (insn);
17387 return;
17389 if (GET_CODE (op0) == MEM)
17390 op1 = force_reg (Pmode, op1);
17391 else
17393 rtx temp = op0;
17394 if (GET_CODE (temp) != REG)
17395 temp = gen_reg_rtx (Pmode);
17396 temp = legitimize_pic_address (op1, temp);
17397 if (temp == op0)
17398 return;
17399 op1 = temp;
17401 /* dynamic-no-pic */
17402 #endif
17404 else
17406 if (MEM_P (op0))
17407 op1 = force_reg (mode, op1);
17408 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17410 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17411 op1 = legitimize_pic_address (op1, reg);
17412 if (op0 == op1)
17413 return;
17414 op1 = convert_to_mode (mode, op1, 1);
17418 else
17420 if (MEM_P (op0)
17421 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17422 || !push_operand (op0, mode))
17423 && MEM_P (op1))
17424 op1 = force_reg (mode, op1);
17426 if (push_operand (op0, mode)
17427 && ! general_no_elim_operand (op1, mode))
17428 op1 = copy_to_mode_reg (mode, op1);
17430 /* Force large constants in 64bit compilation into register
17431 to get them CSEed. */
17432 if (can_create_pseudo_p ()
17433 && (mode == DImode) && TARGET_64BIT
17434 && immediate_operand (op1, mode)
17435 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17436 && !register_operand (op0, mode)
17437 && optimize)
17438 op1 = copy_to_mode_reg (mode, op1);
17440 if (can_create_pseudo_p ()
17441 && FLOAT_MODE_P (mode)
17442 && GET_CODE (op1) == CONST_DOUBLE)
17444 /* If we are loading a floating point constant to a register,
17445 force the value to memory now, since we'll get better code
17446 out the back end. */
17448 op1 = validize_mem (force_const_mem (mode, op1));
17449 if (!register_operand (op0, mode))
17451 rtx temp = gen_reg_rtx (mode);
17452 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17453 emit_move_insn (op0, temp);
17454 return;
17459 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17462 void
17463 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17465 rtx op0 = operands[0], op1 = operands[1];
17466 unsigned int align = GET_MODE_ALIGNMENT (mode);
17468 if (push_operand (op0, VOIDmode))
17469 op0 = emit_move_resolve_push (mode, op0);
17471 /* Force constants other than zero into memory. We do not know how
17472 the instructions used to build constants modify the upper 64 bits
17473 of the register, once we have that information we may be able
17474 to handle some of them more efficiently. */
17475 if (can_create_pseudo_p ()
17476 && register_operand (op0, mode)
17477 && (CONSTANT_P (op1)
17478 || (GET_CODE (op1) == SUBREG
17479 && CONSTANT_P (SUBREG_REG (op1))))
17480 && !standard_sse_constant_p (op1))
17481 op1 = validize_mem (force_const_mem (mode, op1));
17483 /* We need to check memory alignment for SSE mode since attribute
17484 can make operands unaligned. */
17485 if (can_create_pseudo_p ()
17486 && SSE_REG_MODE_P (mode)
17487 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17488 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17490 rtx tmp[2];
17492 /* ix86_expand_vector_move_misalign() does not like constants ... */
17493 if (CONSTANT_P (op1)
17494 || (GET_CODE (op1) == SUBREG
17495 && CONSTANT_P (SUBREG_REG (op1))))
17496 op1 = validize_mem (force_const_mem (mode, op1));
17498 /* ... nor both arguments in memory. */
17499 if (!register_operand (op0, mode)
17500 && !register_operand (op1, mode))
17501 op1 = force_reg (mode, op1);
17503 tmp[0] = op0; tmp[1] = op1;
17504 ix86_expand_vector_move_misalign (mode, tmp);
17505 return;
17508 /* Make operand1 a register if it isn't already. */
17509 if (can_create_pseudo_p ()
17510 && !register_operand (op0, mode)
17511 && !register_operand (op1, mode))
17513 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17514 return;
17517 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17520 /* Split 32-byte AVX unaligned load and store if needed. */
17522 static void
17523 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17525 rtx m;
17526 rtx (*extract) (rtx, rtx, rtx);
17527 rtx (*load_unaligned) (rtx, rtx);
17528 rtx (*store_unaligned) (rtx, rtx);
17529 machine_mode mode;
17531 switch (GET_MODE (op0))
17533 default:
17534 gcc_unreachable ();
17535 case V32QImode:
17536 extract = gen_avx_vextractf128v32qi;
17537 load_unaligned = gen_avx_loaddquv32qi;
17538 store_unaligned = gen_avx_storedquv32qi;
17539 mode = V16QImode;
17540 break;
17541 case V8SFmode:
17542 extract = gen_avx_vextractf128v8sf;
17543 load_unaligned = gen_avx_loadups256;
17544 store_unaligned = gen_avx_storeups256;
17545 mode = V4SFmode;
17546 break;
17547 case V4DFmode:
17548 extract = gen_avx_vextractf128v4df;
17549 load_unaligned = gen_avx_loadupd256;
17550 store_unaligned = gen_avx_storeupd256;
17551 mode = V2DFmode;
17552 break;
17555 if (MEM_P (op1))
17557 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17558 && optimize_insn_for_speed_p ())
17560 rtx r = gen_reg_rtx (mode);
17561 m = adjust_address (op1, mode, 0);
17562 emit_move_insn (r, m);
17563 m = adjust_address (op1, mode, 16);
17564 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17565 emit_move_insn (op0, r);
17567 /* Normal *mov<mode>_internal pattern will handle
17568 unaligned loads just fine if misaligned_operand
17569 is true, and without the UNSPEC it can be combined
17570 with arithmetic instructions. */
17571 else if (misaligned_operand (op1, GET_MODE (op1)))
17572 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17573 else
17574 emit_insn (load_unaligned (op0, op1));
17576 else if (MEM_P (op0))
17578 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17579 && optimize_insn_for_speed_p ())
17581 m = adjust_address (op0, mode, 0);
17582 emit_insn (extract (m, op1, const0_rtx));
17583 m = adjust_address (op0, mode, 16);
17584 emit_insn (extract (m, op1, const1_rtx));
17586 else
17587 emit_insn (store_unaligned (op0, op1));
17589 else
17590 gcc_unreachable ();
17593 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17594 straight to ix86_expand_vector_move. */
17595 /* Code generation for scalar reg-reg moves of single and double precision data:
17596 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17597 movaps reg, reg
17598 else
17599 movss reg, reg
17600 if (x86_sse_partial_reg_dependency == true)
17601 movapd reg, reg
17602 else
17603 movsd reg, reg
17605 Code generation for scalar loads of double precision data:
17606 if (x86_sse_split_regs == true)
17607 movlpd mem, reg (gas syntax)
17608 else
17609 movsd mem, reg
17611 Code generation for unaligned packed loads of single precision data
17612 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17613 if (x86_sse_unaligned_move_optimal)
17614 movups mem, reg
17616 if (x86_sse_partial_reg_dependency == true)
17618 xorps reg, reg
17619 movlps mem, reg
17620 movhps mem+8, reg
17622 else
17624 movlps mem, reg
17625 movhps mem+8, reg
17628 Code generation for unaligned packed loads of double precision data
17629 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17630 if (x86_sse_unaligned_move_optimal)
17631 movupd mem, reg
17633 if (x86_sse_split_regs == true)
17635 movlpd mem, reg
17636 movhpd mem+8, reg
17638 else
17640 movsd mem, reg
17641 movhpd mem+8, reg
17645 void
17646 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17648 rtx op0, op1, orig_op0 = NULL_RTX, m;
17649 rtx (*load_unaligned) (rtx, rtx);
17650 rtx (*store_unaligned) (rtx, rtx);
17652 op0 = operands[0];
17653 op1 = operands[1];
17655 if (GET_MODE_SIZE (mode) == 64)
17657 switch (GET_MODE_CLASS (mode))
17659 case MODE_VECTOR_INT:
17660 case MODE_INT:
17661 if (GET_MODE (op0) != V16SImode)
17663 if (!MEM_P (op0))
17665 orig_op0 = op0;
17666 op0 = gen_reg_rtx (V16SImode);
17668 else
17669 op0 = gen_lowpart (V16SImode, op0);
17671 op1 = gen_lowpart (V16SImode, op1);
17672 /* FALLTHRU */
17674 case MODE_VECTOR_FLOAT:
17675 switch (GET_MODE (op0))
17677 default:
17678 gcc_unreachable ();
17679 case V16SImode:
17680 load_unaligned = gen_avx512f_loaddquv16si;
17681 store_unaligned = gen_avx512f_storedquv16si;
17682 break;
17683 case V16SFmode:
17684 load_unaligned = gen_avx512f_loadups512;
17685 store_unaligned = gen_avx512f_storeups512;
17686 break;
17687 case V8DFmode:
17688 load_unaligned = gen_avx512f_loadupd512;
17689 store_unaligned = gen_avx512f_storeupd512;
17690 break;
17693 if (MEM_P (op1))
17694 emit_insn (load_unaligned (op0, op1));
17695 else if (MEM_P (op0))
17696 emit_insn (store_unaligned (op0, op1));
17697 else
17698 gcc_unreachable ();
17699 if (orig_op0)
17700 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17701 break;
17703 default:
17704 gcc_unreachable ();
17707 return;
17710 if (TARGET_AVX
17711 && GET_MODE_SIZE (mode) == 32)
17713 switch (GET_MODE_CLASS (mode))
17715 case MODE_VECTOR_INT:
17716 case MODE_INT:
17717 if (GET_MODE (op0) != V32QImode)
17719 if (!MEM_P (op0))
17721 orig_op0 = op0;
17722 op0 = gen_reg_rtx (V32QImode);
17724 else
17725 op0 = gen_lowpart (V32QImode, op0);
17727 op1 = gen_lowpart (V32QImode, op1);
17728 /* FALLTHRU */
17730 case MODE_VECTOR_FLOAT:
17731 ix86_avx256_split_vector_move_misalign (op0, op1);
17732 if (orig_op0)
17733 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17734 break;
17736 default:
17737 gcc_unreachable ();
17740 return;
17743 if (MEM_P (op1))
17745 /* Normal *mov<mode>_internal pattern will handle
17746 unaligned loads just fine if misaligned_operand
17747 is true, and without the UNSPEC it can be combined
17748 with arithmetic instructions. */
17749 if (TARGET_AVX
17750 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17751 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17752 && misaligned_operand (op1, GET_MODE (op1)))
17753 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17754 /* ??? If we have typed data, then it would appear that using
17755 movdqu is the only way to get unaligned data loaded with
17756 integer type. */
17757 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17759 if (GET_MODE (op0) != V16QImode)
17761 orig_op0 = op0;
17762 op0 = gen_reg_rtx (V16QImode);
17764 op1 = gen_lowpart (V16QImode, op1);
17765 /* We will eventually emit movups based on insn attributes. */
17766 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17767 if (orig_op0)
17768 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17770 else if (TARGET_SSE2 && mode == V2DFmode)
17772 rtx zero;
17774 if (TARGET_AVX
17775 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17776 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17777 || optimize_insn_for_size_p ())
17779 /* We will eventually emit movups based on insn attributes. */
17780 emit_insn (gen_sse2_loadupd (op0, op1));
17781 return;
17784 /* When SSE registers are split into halves, we can avoid
17785 writing to the top half twice. */
17786 if (TARGET_SSE_SPLIT_REGS)
17788 emit_clobber (op0);
17789 zero = op0;
17791 else
17793 /* ??? Not sure about the best option for the Intel chips.
17794 The following would seem to satisfy; the register is
17795 entirely cleared, breaking the dependency chain. We
17796 then store to the upper half, with a dependency depth
17797 of one. A rumor has it that Intel recommends two movsd
17798 followed by an unpacklpd, but this is unconfirmed. And
17799 given that the dependency depth of the unpacklpd would
17800 still be one, I'm not sure why this would be better. */
17801 zero = CONST0_RTX (V2DFmode);
17804 m = adjust_address (op1, DFmode, 0);
17805 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17806 m = adjust_address (op1, DFmode, 8);
17807 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17809 else
17811 rtx t;
17813 if (TARGET_AVX
17814 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17815 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17816 || optimize_insn_for_size_p ())
17818 if (GET_MODE (op0) != V4SFmode)
17820 orig_op0 = op0;
17821 op0 = gen_reg_rtx (V4SFmode);
17823 op1 = gen_lowpart (V4SFmode, op1);
17824 emit_insn (gen_sse_loadups (op0, op1));
17825 if (orig_op0)
17826 emit_move_insn (orig_op0,
17827 gen_lowpart (GET_MODE (orig_op0), op0));
17828 return;
17831 if (mode != V4SFmode)
17832 t = gen_reg_rtx (V4SFmode);
17833 else
17834 t = op0;
17836 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17837 emit_move_insn (t, CONST0_RTX (V4SFmode));
17838 else
17839 emit_clobber (t);
17841 m = adjust_address (op1, V2SFmode, 0);
17842 emit_insn (gen_sse_loadlps (t, t, m));
17843 m = adjust_address (op1, V2SFmode, 8);
17844 emit_insn (gen_sse_loadhps (t, t, m));
17845 if (mode != V4SFmode)
17846 emit_move_insn (op0, gen_lowpart (mode, t));
17849 else if (MEM_P (op0))
17851 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17853 op0 = gen_lowpart (V16QImode, op0);
17854 op1 = gen_lowpart (V16QImode, op1);
17855 /* We will eventually emit movups based on insn attributes. */
17856 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17858 else if (TARGET_SSE2 && mode == V2DFmode)
17860 if (TARGET_AVX
17861 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17862 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17863 || optimize_insn_for_size_p ())
17864 /* We will eventually emit movups based on insn attributes. */
17865 emit_insn (gen_sse2_storeupd (op0, op1));
17866 else
17868 m = adjust_address (op0, DFmode, 0);
17869 emit_insn (gen_sse2_storelpd (m, op1));
17870 m = adjust_address (op0, DFmode, 8);
17871 emit_insn (gen_sse2_storehpd (m, op1));
17874 else
17876 if (mode != V4SFmode)
17877 op1 = gen_lowpart (V4SFmode, op1);
17879 if (TARGET_AVX
17880 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17881 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17882 || optimize_insn_for_size_p ())
17884 op0 = gen_lowpart (V4SFmode, op0);
17885 emit_insn (gen_sse_storeups (op0, op1));
17887 else
17889 m = adjust_address (op0, V2SFmode, 0);
17890 emit_insn (gen_sse_storelps (m, op1));
17891 m = adjust_address (op0, V2SFmode, 8);
17892 emit_insn (gen_sse_storehps (m, op1));
17896 else
17897 gcc_unreachable ();
17900 /* Helper function of ix86_fixup_binary_operands to canonicalize
17901 operand order. Returns true if the operands should be swapped. */
17903 static bool
17904 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17905 rtx operands[])
17907 rtx dst = operands[0];
17908 rtx src1 = operands[1];
17909 rtx src2 = operands[2];
17911 /* If the operation is not commutative, we can't do anything. */
17912 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17913 return false;
17915 /* Highest priority is that src1 should match dst. */
17916 if (rtx_equal_p (dst, src1))
17917 return false;
17918 if (rtx_equal_p (dst, src2))
17919 return true;
17921 /* Next highest priority is that immediate constants come second. */
17922 if (immediate_operand (src2, mode))
17923 return false;
17924 if (immediate_operand (src1, mode))
17925 return true;
17927 /* Lowest priority is that memory references should come second. */
17928 if (MEM_P (src2))
17929 return false;
17930 if (MEM_P (src1))
17931 return true;
17933 return false;
17937 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17938 destination to use for the operation. If different from the true
17939 destination in operands[0], a copy operation will be required. */
17942 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17943 rtx operands[])
17945 rtx dst = operands[0];
17946 rtx src1 = operands[1];
17947 rtx src2 = operands[2];
17949 /* Canonicalize operand order. */
17950 if (ix86_swap_binary_operands_p (code, mode, operands))
17952 /* It is invalid to swap operands of different modes. */
17953 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17955 std::swap (src1, src2);
17958 /* Both source operands cannot be in memory. */
17959 if (MEM_P (src1) && MEM_P (src2))
17961 /* Optimization: Only read from memory once. */
17962 if (rtx_equal_p (src1, src2))
17964 src2 = force_reg (mode, src2);
17965 src1 = src2;
17967 else if (rtx_equal_p (dst, src1))
17968 src2 = force_reg (mode, src2);
17969 else
17970 src1 = force_reg (mode, src1);
17973 /* If the destination is memory, and we do not have matching source
17974 operands, do things in registers. */
17975 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17976 dst = gen_reg_rtx (mode);
17978 /* Source 1 cannot be a constant. */
17979 if (CONSTANT_P (src1))
17980 src1 = force_reg (mode, src1);
17982 /* Source 1 cannot be a non-matching memory. */
17983 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17984 src1 = force_reg (mode, src1);
17986 /* Improve address combine. */
17987 if (code == PLUS
17988 && GET_MODE_CLASS (mode) == MODE_INT
17989 && MEM_P (src2))
17990 src2 = force_reg (mode, src2);
17992 operands[1] = src1;
17993 operands[2] = src2;
17994 return dst;
17997 /* Similarly, but assume that the destination has already been
17998 set up properly. */
18000 void
18001 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18002 machine_mode mode, rtx operands[])
18004 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18005 gcc_assert (dst == operands[0]);
18008 /* Attempt to expand a binary operator. Make the expansion closer to the
18009 actual machine, then just general_operand, which will allow 3 separate
18010 memory references (one output, two input) in a single insn. */
18012 void
18013 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18014 rtx operands[])
18016 rtx src1, src2, dst, op, clob;
18018 dst = ix86_fixup_binary_operands (code, mode, operands);
18019 src1 = operands[1];
18020 src2 = operands[2];
18022 /* Emit the instruction. */
18024 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18025 if (reload_in_progress)
18027 /* Reload doesn't know about the flags register, and doesn't know that
18028 it doesn't want to clobber it. We can only do this with PLUS. */
18029 gcc_assert (code == PLUS);
18030 emit_insn (op);
18032 else if (reload_completed
18033 && code == PLUS
18034 && !rtx_equal_p (dst, src1))
18036 /* This is going to be an LEA; avoid splitting it later. */
18037 emit_insn (op);
18039 else
18041 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18042 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18045 /* Fix up the destination if needed. */
18046 if (dst != operands[0])
18047 emit_move_insn (operands[0], dst);
18050 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18051 the given OPERANDS. */
18053 void
18054 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18055 rtx operands[])
18057 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18058 if (GET_CODE (operands[1]) == SUBREG)
18060 op1 = operands[1];
18061 op2 = operands[2];
18063 else if (GET_CODE (operands[2]) == SUBREG)
18065 op1 = operands[2];
18066 op2 = operands[1];
18068 /* Optimize (__m128i) d | (__m128i) e and similar code
18069 when d and e are float vectors into float vector logical
18070 insn. In C/C++ without using intrinsics there is no other way
18071 to express vector logical operation on float vectors than
18072 to cast them temporarily to integer vectors. */
18073 if (op1
18074 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18075 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18076 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18077 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18078 && SUBREG_BYTE (op1) == 0
18079 && (GET_CODE (op2) == CONST_VECTOR
18080 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18081 && SUBREG_BYTE (op2) == 0))
18082 && can_create_pseudo_p ())
18084 rtx dst;
18085 switch (GET_MODE (SUBREG_REG (op1)))
18087 case V4SFmode:
18088 case V8SFmode:
18089 case V16SFmode:
18090 case V2DFmode:
18091 case V4DFmode:
18092 case V8DFmode:
18093 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18094 if (GET_CODE (op2) == CONST_VECTOR)
18096 op2 = gen_lowpart (GET_MODE (dst), op2);
18097 op2 = force_reg (GET_MODE (dst), op2);
18099 else
18101 op1 = operands[1];
18102 op2 = SUBREG_REG (operands[2]);
18103 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18104 op2 = force_reg (GET_MODE (dst), op2);
18106 op1 = SUBREG_REG (op1);
18107 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18108 op1 = force_reg (GET_MODE (dst), op1);
18109 emit_insn (gen_rtx_SET (VOIDmode, dst,
18110 gen_rtx_fmt_ee (code, GET_MODE (dst),
18111 op1, op2)));
18112 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18113 return;
18114 default:
18115 break;
18118 if (!nonimmediate_operand (operands[1], mode))
18119 operands[1] = force_reg (mode, operands[1]);
18120 if (!nonimmediate_operand (operands[2], mode))
18121 operands[2] = force_reg (mode, operands[2]);
18122 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18123 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18124 gen_rtx_fmt_ee (code, mode, operands[1],
18125 operands[2])));
18128 /* Return TRUE or FALSE depending on whether the binary operator meets the
18129 appropriate constraints. */
18131 bool
18132 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18133 rtx operands[3])
18135 rtx dst = operands[0];
18136 rtx src1 = operands[1];
18137 rtx src2 = operands[2];
18139 /* Both source operands cannot be in memory. */
18140 if (MEM_P (src1) && MEM_P (src2))
18141 return false;
18143 /* Canonicalize operand order for commutative operators. */
18144 if (ix86_swap_binary_operands_p (code, mode, operands))
18145 std::swap (src1, src2);
18147 /* If the destination is memory, we must have a matching source operand. */
18148 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18149 return false;
18151 /* Source 1 cannot be a constant. */
18152 if (CONSTANT_P (src1))
18153 return false;
18155 /* Source 1 cannot be a non-matching memory. */
18156 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18157 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18158 return (code == AND
18159 && (mode == HImode
18160 || mode == SImode
18161 || (TARGET_64BIT && mode == DImode))
18162 && satisfies_constraint_L (src2));
18164 return true;
18167 /* Attempt to expand a unary operator. Make the expansion closer to the
18168 actual machine, then just general_operand, which will allow 2 separate
18169 memory references (one output, one input) in a single insn. */
18171 void
18172 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18173 rtx operands[])
18175 bool matching_memory = false;
18176 rtx src, dst, op, clob;
18178 dst = operands[0];
18179 src = operands[1];
18181 /* If the destination is memory, and we do not have matching source
18182 operands, do things in registers. */
18183 if (MEM_P (dst))
18185 if (rtx_equal_p (dst, src))
18186 matching_memory = true;
18187 else
18188 dst = gen_reg_rtx (mode);
18191 /* When source operand is memory, destination must match. */
18192 if (MEM_P (src) && !matching_memory)
18193 src = force_reg (mode, src);
18195 /* Emit the instruction. */
18197 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18198 if (reload_in_progress || code == NOT)
18200 /* Reload doesn't know about the flags register, and doesn't know that
18201 it doesn't want to clobber it. */
18202 gcc_assert (code == NOT);
18203 emit_insn (op);
18205 else
18207 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18208 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18211 /* Fix up the destination if needed. */
18212 if (dst != operands[0])
18213 emit_move_insn (operands[0], dst);
18216 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18217 divisor are within the range [0-255]. */
18219 void
18220 ix86_split_idivmod (machine_mode mode, rtx operands[],
18221 bool signed_p)
18223 rtx_code_label *end_label, *qimode_label;
18224 rtx insn, div, mod;
18225 rtx scratch, tmp0, tmp1, tmp2;
18226 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18227 rtx (*gen_zero_extend) (rtx, rtx);
18228 rtx (*gen_test_ccno_1) (rtx, rtx);
18230 switch (mode)
18232 case SImode:
18233 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18234 gen_test_ccno_1 = gen_testsi_ccno_1;
18235 gen_zero_extend = gen_zero_extendqisi2;
18236 break;
18237 case DImode:
18238 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18239 gen_test_ccno_1 = gen_testdi_ccno_1;
18240 gen_zero_extend = gen_zero_extendqidi2;
18241 break;
18242 default:
18243 gcc_unreachable ();
18246 end_label = gen_label_rtx ();
18247 qimode_label = gen_label_rtx ();
18249 scratch = gen_reg_rtx (mode);
18251 /* Use 8bit unsigned divimod if dividend and divisor are within
18252 the range [0-255]. */
18253 emit_move_insn (scratch, operands[2]);
18254 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18255 scratch, 1, OPTAB_DIRECT);
18256 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18257 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18258 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18259 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18260 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18261 pc_rtx);
18262 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18263 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18264 JUMP_LABEL (insn) = qimode_label;
18266 /* Generate original signed/unsigned divimod. */
18267 div = gen_divmod4_1 (operands[0], operands[1],
18268 operands[2], operands[3]);
18269 emit_insn (div);
18271 /* Branch to the end. */
18272 emit_jump_insn (gen_jump (end_label));
18273 emit_barrier ();
18275 /* Generate 8bit unsigned divide. */
18276 emit_label (qimode_label);
18277 /* Don't use operands[0] for result of 8bit divide since not all
18278 registers support QImode ZERO_EXTRACT. */
18279 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18280 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18281 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18282 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18284 if (signed_p)
18286 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18287 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18289 else
18291 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18292 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18295 /* Extract remainder from AH. */
18296 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18297 if (REG_P (operands[1]))
18298 insn = emit_move_insn (operands[1], tmp1);
18299 else
18301 /* Need a new scratch register since the old one has result
18302 of 8bit divide. */
18303 scratch = gen_reg_rtx (mode);
18304 emit_move_insn (scratch, tmp1);
18305 insn = emit_move_insn (operands[1], scratch);
18307 set_unique_reg_note (insn, REG_EQUAL, mod);
18309 /* Zero extend quotient from AL. */
18310 tmp1 = gen_lowpart (QImode, tmp0);
18311 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18312 set_unique_reg_note (insn, REG_EQUAL, div);
18314 emit_label (end_label);
18317 #define LEA_MAX_STALL (3)
18318 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18320 /* Increase given DISTANCE in half-cycles according to
18321 dependencies between PREV and NEXT instructions.
18322 Add 1 half-cycle if there is no dependency and
18323 go to next cycle if there is some dependecy. */
18325 static unsigned int
18326 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18328 df_ref def, use;
18330 if (!prev || !next)
18331 return distance + (distance & 1) + 2;
18333 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18334 return distance + 1;
18336 FOR_EACH_INSN_USE (use, next)
18337 FOR_EACH_INSN_DEF (def, prev)
18338 if (!DF_REF_IS_ARTIFICIAL (def)
18339 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18340 return distance + (distance & 1) + 2;
18342 return distance + 1;
18345 /* Function checks if instruction INSN defines register number
18346 REGNO1 or REGNO2. */
18348 static bool
18349 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18350 rtx insn)
18352 df_ref def;
18354 FOR_EACH_INSN_DEF (def, insn)
18355 if (DF_REF_REG_DEF_P (def)
18356 && !DF_REF_IS_ARTIFICIAL (def)
18357 && (regno1 == DF_REF_REGNO (def)
18358 || regno2 == DF_REF_REGNO (def)))
18359 return true;
18361 return false;
18364 /* Function checks if instruction INSN uses register number
18365 REGNO as a part of address expression. */
18367 static bool
18368 insn_uses_reg_mem (unsigned int regno, rtx insn)
18370 df_ref use;
18372 FOR_EACH_INSN_USE (use, insn)
18373 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18374 return true;
18376 return false;
18379 /* Search backward for non-agu definition of register number REGNO1
18380 or register number REGNO2 in basic block starting from instruction
18381 START up to head of basic block or instruction INSN.
18383 Function puts true value into *FOUND var if definition was found
18384 and false otherwise.
18386 Distance in half-cycles between START and found instruction or head
18387 of BB is added to DISTANCE and returned. */
18389 static int
18390 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18391 rtx_insn *insn, int distance,
18392 rtx_insn *start, bool *found)
18394 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18395 rtx_insn *prev = start;
18396 rtx_insn *next = NULL;
18398 *found = false;
18400 while (prev
18401 && prev != insn
18402 && distance < LEA_SEARCH_THRESHOLD)
18404 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18406 distance = increase_distance (prev, next, distance);
18407 if (insn_defines_reg (regno1, regno2, prev))
18409 if (recog_memoized (prev) < 0
18410 || get_attr_type (prev) != TYPE_LEA)
18412 *found = true;
18413 return distance;
18417 next = prev;
18419 if (prev == BB_HEAD (bb))
18420 break;
18422 prev = PREV_INSN (prev);
18425 return distance;
18428 /* Search backward for non-agu definition of register number REGNO1
18429 or register number REGNO2 in INSN's basic block until
18430 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18431 2. Reach neighbour BBs boundary, or
18432 3. Reach agu definition.
18433 Returns the distance between the non-agu definition point and INSN.
18434 If no definition point, returns -1. */
18436 static int
18437 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18438 rtx_insn *insn)
18440 basic_block bb = BLOCK_FOR_INSN (insn);
18441 int distance = 0;
18442 bool found = false;
18444 if (insn != BB_HEAD (bb))
18445 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18446 distance, PREV_INSN (insn),
18447 &found);
18449 if (!found && distance < LEA_SEARCH_THRESHOLD)
18451 edge e;
18452 edge_iterator ei;
18453 bool simple_loop = false;
18455 FOR_EACH_EDGE (e, ei, bb->preds)
18456 if (e->src == bb)
18458 simple_loop = true;
18459 break;
18462 if (simple_loop)
18463 distance = distance_non_agu_define_in_bb (regno1, regno2,
18464 insn, distance,
18465 BB_END (bb), &found);
18466 else
18468 int shortest_dist = -1;
18469 bool found_in_bb = false;
18471 FOR_EACH_EDGE (e, ei, bb->preds)
18473 int bb_dist
18474 = distance_non_agu_define_in_bb (regno1, regno2,
18475 insn, distance,
18476 BB_END (e->src),
18477 &found_in_bb);
18478 if (found_in_bb)
18480 if (shortest_dist < 0)
18481 shortest_dist = bb_dist;
18482 else if (bb_dist > 0)
18483 shortest_dist = MIN (bb_dist, shortest_dist);
18485 found = true;
18489 distance = shortest_dist;
18493 /* get_attr_type may modify recog data. We want to make sure
18494 that recog data is valid for instruction INSN, on which
18495 distance_non_agu_define is called. INSN is unchanged here. */
18496 extract_insn_cached (insn);
18498 if (!found)
18499 return -1;
18501 return distance >> 1;
18504 /* Return the distance in half-cycles between INSN and the next
18505 insn that uses register number REGNO in memory address added
18506 to DISTANCE. Return -1 if REGNO0 is set.
18508 Put true value into *FOUND if register usage was found and
18509 false otherwise.
18510 Put true value into *REDEFINED if register redefinition was
18511 found and false otherwise. */
18513 static int
18514 distance_agu_use_in_bb (unsigned int regno,
18515 rtx_insn *insn, int distance, rtx_insn *start,
18516 bool *found, bool *redefined)
18518 basic_block bb = NULL;
18519 rtx_insn *next = start;
18520 rtx_insn *prev = NULL;
18522 *found = false;
18523 *redefined = false;
18525 if (start != NULL_RTX)
18527 bb = BLOCK_FOR_INSN (start);
18528 if (start != BB_HEAD (bb))
18529 /* If insn and start belong to the same bb, set prev to insn,
18530 so the call to increase_distance will increase the distance
18531 between insns by 1. */
18532 prev = insn;
18535 while (next
18536 && next != insn
18537 && distance < LEA_SEARCH_THRESHOLD)
18539 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18541 distance = increase_distance(prev, next, distance);
18542 if (insn_uses_reg_mem (regno, next))
18544 /* Return DISTANCE if OP0 is used in memory
18545 address in NEXT. */
18546 *found = true;
18547 return distance;
18550 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18552 /* Return -1 if OP0 is set in NEXT. */
18553 *redefined = true;
18554 return -1;
18557 prev = next;
18560 if (next == BB_END (bb))
18561 break;
18563 next = NEXT_INSN (next);
18566 return distance;
18569 /* Return the distance between INSN and the next insn that uses
18570 register number REGNO0 in memory address. Return -1 if no such
18571 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18573 static int
18574 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18576 basic_block bb = BLOCK_FOR_INSN (insn);
18577 int distance = 0;
18578 bool found = false;
18579 bool redefined = false;
18581 if (insn != BB_END (bb))
18582 distance = distance_agu_use_in_bb (regno0, insn, distance,
18583 NEXT_INSN (insn),
18584 &found, &redefined);
18586 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18588 edge e;
18589 edge_iterator ei;
18590 bool simple_loop = false;
18592 FOR_EACH_EDGE (e, ei, bb->succs)
18593 if (e->dest == bb)
18595 simple_loop = true;
18596 break;
18599 if (simple_loop)
18600 distance = distance_agu_use_in_bb (regno0, insn,
18601 distance, BB_HEAD (bb),
18602 &found, &redefined);
18603 else
18605 int shortest_dist = -1;
18606 bool found_in_bb = false;
18607 bool redefined_in_bb = false;
18609 FOR_EACH_EDGE (e, ei, bb->succs)
18611 int bb_dist
18612 = distance_agu_use_in_bb (regno0, insn,
18613 distance, BB_HEAD (e->dest),
18614 &found_in_bb, &redefined_in_bb);
18615 if (found_in_bb)
18617 if (shortest_dist < 0)
18618 shortest_dist = bb_dist;
18619 else if (bb_dist > 0)
18620 shortest_dist = MIN (bb_dist, shortest_dist);
18622 found = true;
18626 distance = shortest_dist;
18630 if (!found || redefined)
18631 return -1;
18633 return distance >> 1;
18636 /* Define this macro to tune LEA priority vs ADD, it take effect when
18637 there is a dilemma of choicing LEA or ADD
18638 Negative value: ADD is more preferred than LEA
18639 Zero: Netrual
18640 Positive value: LEA is more preferred than ADD*/
18641 #define IX86_LEA_PRIORITY 0
18643 /* Return true if usage of lea INSN has performance advantage
18644 over a sequence of instructions. Instructions sequence has
18645 SPLIT_COST cycles higher latency than lea latency. */
18647 static bool
18648 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18649 unsigned int regno2, int split_cost, bool has_scale)
18651 int dist_define, dist_use;
18653 /* For Silvermont if using a 2-source or 3-source LEA for
18654 non-destructive destination purposes, or due to wanting
18655 ability to use SCALE, the use of LEA is justified. */
18656 if (TARGET_SILVERMONT || TARGET_INTEL)
18658 if (has_scale)
18659 return true;
18660 if (split_cost < 1)
18661 return false;
18662 if (regno0 == regno1 || regno0 == regno2)
18663 return false;
18664 return true;
18667 dist_define = distance_non_agu_define (regno1, regno2, insn);
18668 dist_use = distance_agu_use (regno0, insn);
18670 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18672 /* If there is no non AGU operand definition, no AGU
18673 operand usage and split cost is 0 then both lea
18674 and non lea variants have same priority. Currently
18675 we prefer lea for 64 bit code and non lea on 32 bit
18676 code. */
18677 if (dist_use < 0 && split_cost == 0)
18678 return TARGET_64BIT || IX86_LEA_PRIORITY;
18679 else
18680 return true;
18683 /* With longer definitions distance lea is more preferable.
18684 Here we change it to take into account splitting cost and
18685 lea priority. */
18686 dist_define += split_cost + IX86_LEA_PRIORITY;
18688 /* If there is no use in memory addess then we just check
18689 that split cost exceeds AGU stall. */
18690 if (dist_use < 0)
18691 return dist_define > LEA_MAX_STALL;
18693 /* If this insn has both backward non-agu dependence and forward
18694 agu dependence, the one with short distance takes effect. */
18695 return dist_define >= dist_use;
18698 /* Return true if it is legal to clobber flags by INSN and
18699 false otherwise. */
18701 static bool
18702 ix86_ok_to_clobber_flags (rtx_insn *insn)
18704 basic_block bb = BLOCK_FOR_INSN (insn);
18705 df_ref use;
18706 bitmap live;
18708 while (insn)
18710 if (NONDEBUG_INSN_P (insn))
18712 FOR_EACH_INSN_USE (use, insn)
18713 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18714 return false;
18716 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18717 return true;
18720 if (insn == BB_END (bb))
18721 break;
18723 insn = NEXT_INSN (insn);
18726 live = df_get_live_out(bb);
18727 return !REGNO_REG_SET_P (live, FLAGS_REG);
18730 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18731 move and add to avoid AGU stalls. */
18733 bool
18734 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18736 unsigned int regno0, regno1, regno2;
18738 /* Check if we need to optimize. */
18739 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18740 return false;
18742 /* Check it is correct to split here. */
18743 if (!ix86_ok_to_clobber_flags(insn))
18744 return false;
18746 regno0 = true_regnum (operands[0]);
18747 regno1 = true_regnum (operands[1]);
18748 regno2 = true_regnum (operands[2]);
18750 /* We need to split only adds with non destructive
18751 destination operand. */
18752 if (regno0 == regno1 || regno0 == regno2)
18753 return false;
18754 else
18755 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18758 /* Return true if we should emit lea instruction instead of mov
18759 instruction. */
18761 bool
18762 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18764 unsigned int regno0, regno1;
18766 /* Check if we need to optimize. */
18767 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18768 return false;
18770 /* Use lea for reg to reg moves only. */
18771 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18772 return false;
18774 regno0 = true_regnum (operands[0]);
18775 regno1 = true_regnum (operands[1]);
18777 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18780 /* Return true if we need to split lea into a sequence of
18781 instructions to avoid AGU stalls. */
18783 bool
18784 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18786 unsigned int regno0, regno1, regno2;
18787 int split_cost;
18788 struct ix86_address parts;
18789 int ok;
18791 /* Check we need to optimize. */
18792 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18793 return false;
18795 /* The "at least two components" test below might not catch simple
18796 move or zero extension insns if parts.base is non-NULL and parts.disp
18797 is const0_rtx as the only components in the address, e.g. if the
18798 register is %rbp or %r13. As this test is much cheaper and moves or
18799 zero extensions are the common case, do this check first. */
18800 if (REG_P (operands[1])
18801 || (SImode_address_operand (operands[1], VOIDmode)
18802 && REG_P (XEXP (operands[1], 0))))
18803 return false;
18805 /* Check if it is OK to split here. */
18806 if (!ix86_ok_to_clobber_flags (insn))
18807 return false;
18809 ok = ix86_decompose_address (operands[1], &parts);
18810 gcc_assert (ok);
18812 /* There should be at least two components in the address. */
18813 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18814 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18815 return false;
18817 /* We should not split into add if non legitimate pic
18818 operand is used as displacement. */
18819 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18820 return false;
18822 regno0 = true_regnum (operands[0]) ;
18823 regno1 = INVALID_REGNUM;
18824 regno2 = INVALID_REGNUM;
18826 if (parts.base)
18827 regno1 = true_regnum (parts.base);
18828 if (parts.index)
18829 regno2 = true_regnum (parts.index);
18831 split_cost = 0;
18833 /* Compute how many cycles we will add to execution time
18834 if split lea into a sequence of instructions. */
18835 if (parts.base || parts.index)
18837 /* Have to use mov instruction if non desctructive
18838 destination form is used. */
18839 if (regno1 != regno0 && regno2 != regno0)
18840 split_cost += 1;
18842 /* Have to add index to base if both exist. */
18843 if (parts.base && parts.index)
18844 split_cost += 1;
18846 /* Have to use shift and adds if scale is 2 or greater. */
18847 if (parts.scale > 1)
18849 if (regno0 != regno1)
18850 split_cost += 1;
18851 else if (regno2 == regno0)
18852 split_cost += 4;
18853 else
18854 split_cost += parts.scale;
18857 /* Have to use add instruction with immediate if
18858 disp is non zero. */
18859 if (parts.disp && parts.disp != const0_rtx)
18860 split_cost += 1;
18862 /* Subtract the price of lea. */
18863 split_cost -= 1;
18866 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18867 parts.scale > 1);
18870 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18871 matches destination. RTX includes clobber of FLAGS_REG. */
18873 static void
18874 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18875 rtx dst, rtx src)
18877 rtx op, clob;
18879 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18880 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18882 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18885 /* Return true if regno1 def is nearest to the insn. */
18887 static bool
18888 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18890 rtx_insn *prev = insn;
18891 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18893 if (insn == start)
18894 return false;
18895 while (prev && prev != start)
18897 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18899 prev = PREV_INSN (prev);
18900 continue;
18902 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18903 return true;
18904 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18905 return false;
18906 prev = PREV_INSN (prev);
18909 /* None of the regs is defined in the bb. */
18910 return false;
18913 /* Split lea instructions into a sequence of instructions
18914 which are executed on ALU to avoid AGU stalls.
18915 It is assumed that it is allowed to clobber flags register
18916 at lea position. */
18918 void
18919 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18921 unsigned int regno0, regno1, regno2;
18922 struct ix86_address parts;
18923 rtx target, tmp;
18924 int ok, adds;
18926 ok = ix86_decompose_address (operands[1], &parts);
18927 gcc_assert (ok);
18929 target = gen_lowpart (mode, operands[0]);
18931 regno0 = true_regnum (target);
18932 regno1 = INVALID_REGNUM;
18933 regno2 = INVALID_REGNUM;
18935 if (parts.base)
18937 parts.base = gen_lowpart (mode, parts.base);
18938 regno1 = true_regnum (parts.base);
18941 if (parts.index)
18943 parts.index = gen_lowpart (mode, parts.index);
18944 regno2 = true_regnum (parts.index);
18947 if (parts.disp)
18948 parts.disp = gen_lowpart (mode, parts.disp);
18950 if (parts.scale > 1)
18952 /* Case r1 = r1 + ... */
18953 if (regno1 == regno0)
18955 /* If we have a case r1 = r1 + C * r2 then we
18956 should use multiplication which is very
18957 expensive. Assume cost model is wrong if we
18958 have such case here. */
18959 gcc_assert (regno2 != regno0);
18961 for (adds = parts.scale; adds > 0; adds--)
18962 ix86_emit_binop (PLUS, mode, target, parts.index);
18964 else
18966 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18967 if (regno0 != regno2)
18968 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18970 /* Use shift for scaling. */
18971 ix86_emit_binop (ASHIFT, mode, target,
18972 GEN_INT (exact_log2 (parts.scale)));
18974 if (parts.base)
18975 ix86_emit_binop (PLUS, mode, target, parts.base);
18977 if (parts.disp && parts.disp != const0_rtx)
18978 ix86_emit_binop (PLUS, mode, target, parts.disp);
18981 else if (!parts.base && !parts.index)
18983 gcc_assert(parts.disp);
18984 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18986 else
18988 if (!parts.base)
18990 if (regno0 != regno2)
18991 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18993 else if (!parts.index)
18995 if (regno0 != regno1)
18996 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18998 else
19000 if (regno0 == regno1)
19001 tmp = parts.index;
19002 else if (regno0 == regno2)
19003 tmp = parts.base;
19004 else
19006 rtx tmp1;
19008 /* Find better operand for SET instruction, depending
19009 on which definition is farther from the insn. */
19010 if (find_nearest_reg_def (insn, regno1, regno2))
19011 tmp = parts.index, tmp1 = parts.base;
19012 else
19013 tmp = parts.base, tmp1 = parts.index;
19015 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19017 if (parts.disp && parts.disp != const0_rtx)
19018 ix86_emit_binop (PLUS, mode, target, parts.disp);
19020 ix86_emit_binop (PLUS, mode, target, tmp1);
19021 return;
19024 ix86_emit_binop (PLUS, mode, target, tmp);
19027 if (parts.disp && parts.disp != const0_rtx)
19028 ix86_emit_binop (PLUS, mode, target, parts.disp);
19032 /* Return true if it is ok to optimize an ADD operation to LEA
19033 operation to avoid flag register consumation. For most processors,
19034 ADD is faster than LEA. For the processors like BONNELL, if the
19035 destination register of LEA holds an actual address which will be
19036 used soon, LEA is better and otherwise ADD is better. */
19038 bool
19039 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19041 unsigned int regno0 = true_regnum (operands[0]);
19042 unsigned int regno1 = true_regnum (operands[1]);
19043 unsigned int regno2 = true_regnum (operands[2]);
19045 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19046 if (regno0 != regno1 && regno0 != regno2)
19047 return true;
19049 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19050 return false;
19052 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19055 /* Return true if destination reg of SET_BODY is shift count of
19056 USE_BODY. */
19058 static bool
19059 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19061 rtx set_dest;
19062 rtx shift_rtx;
19063 int i;
19065 /* Retrieve destination of SET_BODY. */
19066 switch (GET_CODE (set_body))
19068 case SET:
19069 set_dest = SET_DEST (set_body);
19070 if (!set_dest || !REG_P (set_dest))
19071 return false;
19072 break;
19073 case PARALLEL:
19074 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19075 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19076 use_body))
19077 return true;
19078 default:
19079 return false;
19080 break;
19083 /* Retrieve shift count of USE_BODY. */
19084 switch (GET_CODE (use_body))
19086 case SET:
19087 shift_rtx = XEXP (use_body, 1);
19088 break;
19089 case PARALLEL:
19090 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19091 if (ix86_dep_by_shift_count_body (set_body,
19092 XVECEXP (use_body, 0, i)))
19093 return true;
19094 default:
19095 return false;
19096 break;
19099 if (shift_rtx
19100 && (GET_CODE (shift_rtx) == ASHIFT
19101 || GET_CODE (shift_rtx) == LSHIFTRT
19102 || GET_CODE (shift_rtx) == ASHIFTRT
19103 || GET_CODE (shift_rtx) == ROTATE
19104 || GET_CODE (shift_rtx) == ROTATERT))
19106 rtx shift_count = XEXP (shift_rtx, 1);
19108 /* Return true if shift count is dest of SET_BODY. */
19109 if (REG_P (shift_count))
19111 /* Add check since it can be invoked before register
19112 allocation in pre-reload schedule. */
19113 if (reload_completed
19114 && true_regnum (set_dest) == true_regnum (shift_count))
19115 return true;
19116 else if (REGNO(set_dest) == REGNO(shift_count))
19117 return true;
19121 return false;
19124 /* Return true if destination reg of SET_INSN is shift count of
19125 USE_INSN. */
19127 bool
19128 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19130 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19131 PATTERN (use_insn));
19134 /* Return TRUE or FALSE depending on whether the unary operator meets the
19135 appropriate constraints. */
19137 bool
19138 ix86_unary_operator_ok (enum rtx_code,
19139 machine_mode,
19140 rtx operands[2])
19142 /* If one of operands is memory, source and destination must match. */
19143 if ((MEM_P (operands[0])
19144 || MEM_P (operands[1]))
19145 && ! rtx_equal_p (operands[0], operands[1]))
19146 return false;
19147 return true;
19150 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19151 are ok, keeping in mind the possible movddup alternative. */
19153 bool
19154 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19156 if (MEM_P (operands[0]))
19157 return rtx_equal_p (operands[0], operands[1 + high]);
19158 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19159 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19160 return true;
19163 /* Post-reload splitter for converting an SF or DFmode value in an
19164 SSE register into an unsigned SImode. */
19166 void
19167 ix86_split_convert_uns_si_sse (rtx operands[])
19169 machine_mode vecmode;
19170 rtx value, large, zero_or_two31, input, two31, x;
19172 large = operands[1];
19173 zero_or_two31 = operands[2];
19174 input = operands[3];
19175 two31 = operands[4];
19176 vecmode = GET_MODE (large);
19177 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19179 /* Load up the value into the low element. We must ensure that the other
19180 elements are valid floats -- zero is the easiest such value. */
19181 if (MEM_P (input))
19183 if (vecmode == V4SFmode)
19184 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19185 else
19186 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19188 else
19190 input = gen_rtx_REG (vecmode, REGNO (input));
19191 emit_move_insn (value, CONST0_RTX (vecmode));
19192 if (vecmode == V4SFmode)
19193 emit_insn (gen_sse_movss (value, value, input));
19194 else
19195 emit_insn (gen_sse2_movsd (value, value, input));
19198 emit_move_insn (large, two31);
19199 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19201 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19202 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19204 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19205 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19207 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19208 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19210 large = gen_rtx_REG (V4SImode, REGNO (large));
19211 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19213 x = gen_rtx_REG (V4SImode, REGNO (value));
19214 if (vecmode == V4SFmode)
19215 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19216 else
19217 emit_insn (gen_sse2_cvttpd2dq (x, value));
19218 value = x;
19220 emit_insn (gen_xorv4si3 (value, value, large));
19223 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19224 Expects the 64-bit DImode to be supplied in a pair of integral
19225 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19226 -mfpmath=sse, !optimize_size only. */
19228 void
19229 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19231 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19232 rtx int_xmm, fp_xmm;
19233 rtx biases, exponents;
19234 rtx x;
19236 int_xmm = gen_reg_rtx (V4SImode);
19237 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19238 emit_insn (gen_movdi_to_sse (int_xmm, input));
19239 else if (TARGET_SSE_SPLIT_REGS)
19241 emit_clobber (int_xmm);
19242 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19244 else
19246 x = gen_reg_rtx (V2DImode);
19247 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19248 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19251 x = gen_rtx_CONST_VECTOR (V4SImode,
19252 gen_rtvec (4, GEN_INT (0x43300000UL),
19253 GEN_INT (0x45300000UL),
19254 const0_rtx, const0_rtx));
19255 exponents = validize_mem (force_const_mem (V4SImode, x));
19257 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19258 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19260 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19261 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19262 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19263 (0x1.0p84 + double(fp_value_hi_xmm)).
19264 Note these exponents differ by 32. */
19266 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19268 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19269 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19270 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19271 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19272 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19273 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19274 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19275 biases = validize_mem (force_const_mem (V2DFmode, biases));
19276 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19278 /* Add the upper and lower DFmode values together. */
19279 if (TARGET_SSE3)
19280 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19281 else
19283 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19284 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19285 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19288 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19291 /* Not used, but eases macroization of patterns. */
19292 void
19293 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19295 gcc_unreachable ();
19298 /* Convert an unsigned SImode value into a DFmode. Only currently used
19299 for SSE, but applicable anywhere. */
19301 void
19302 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19304 REAL_VALUE_TYPE TWO31r;
19305 rtx x, fp;
19307 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19308 NULL, 1, OPTAB_DIRECT);
19310 fp = gen_reg_rtx (DFmode);
19311 emit_insn (gen_floatsidf2 (fp, x));
19313 real_ldexp (&TWO31r, &dconst1, 31);
19314 x = const_double_from_real_value (TWO31r, DFmode);
19316 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19317 if (x != target)
19318 emit_move_insn (target, x);
19321 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19322 32-bit mode; otherwise we have a direct convert instruction. */
19324 void
19325 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19327 REAL_VALUE_TYPE TWO32r;
19328 rtx fp_lo, fp_hi, x;
19330 fp_lo = gen_reg_rtx (DFmode);
19331 fp_hi = gen_reg_rtx (DFmode);
19333 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19335 real_ldexp (&TWO32r, &dconst1, 32);
19336 x = const_double_from_real_value (TWO32r, DFmode);
19337 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19339 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19341 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19342 0, OPTAB_DIRECT);
19343 if (x != target)
19344 emit_move_insn (target, x);
19347 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19348 For x86_32, -mfpmath=sse, !optimize_size only. */
19349 void
19350 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19352 REAL_VALUE_TYPE ONE16r;
19353 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19355 real_ldexp (&ONE16r, &dconst1, 16);
19356 x = const_double_from_real_value (ONE16r, SFmode);
19357 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19358 NULL, 0, OPTAB_DIRECT);
19359 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19360 NULL, 0, OPTAB_DIRECT);
19361 fp_hi = gen_reg_rtx (SFmode);
19362 fp_lo = gen_reg_rtx (SFmode);
19363 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19364 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19365 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19366 0, OPTAB_DIRECT);
19367 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19368 0, OPTAB_DIRECT);
19369 if (!rtx_equal_p (target, fp_hi))
19370 emit_move_insn (target, fp_hi);
19373 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19374 a vector of unsigned ints VAL to vector of floats TARGET. */
19376 void
19377 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19379 rtx tmp[8];
19380 REAL_VALUE_TYPE TWO16r;
19381 machine_mode intmode = GET_MODE (val);
19382 machine_mode fltmode = GET_MODE (target);
19383 rtx (*cvt) (rtx, rtx);
19385 if (intmode == V4SImode)
19386 cvt = gen_floatv4siv4sf2;
19387 else
19388 cvt = gen_floatv8siv8sf2;
19389 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19390 tmp[0] = force_reg (intmode, tmp[0]);
19391 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19392 OPTAB_DIRECT);
19393 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19394 NULL_RTX, 1, OPTAB_DIRECT);
19395 tmp[3] = gen_reg_rtx (fltmode);
19396 emit_insn (cvt (tmp[3], tmp[1]));
19397 tmp[4] = gen_reg_rtx (fltmode);
19398 emit_insn (cvt (tmp[4], tmp[2]));
19399 real_ldexp (&TWO16r, &dconst1, 16);
19400 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19401 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19402 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19403 OPTAB_DIRECT);
19404 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19405 OPTAB_DIRECT);
19406 if (tmp[7] != target)
19407 emit_move_insn (target, tmp[7]);
19410 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19411 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19412 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19413 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19416 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19418 REAL_VALUE_TYPE TWO31r;
19419 rtx two31r, tmp[4];
19420 machine_mode mode = GET_MODE (val);
19421 machine_mode scalarmode = GET_MODE_INNER (mode);
19422 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19423 rtx (*cmp) (rtx, rtx, rtx, rtx);
19424 int i;
19426 for (i = 0; i < 3; i++)
19427 tmp[i] = gen_reg_rtx (mode);
19428 real_ldexp (&TWO31r, &dconst1, 31);
19429 two31r = const_double_from_real_value (TWO31r, scalarmode);
19430 two31r = ix86_build_const_vector (mode, 1, two31r);
19431 two31r = force_reg (mode, two31r);
19432 switch (mode)
19434 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19435 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19436 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19437 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19438 default: gcc_unreachable ();
19440 tmp[3] = gen_rtx_LE (mode, two31r, val);
19441 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19442 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19443 0, OPTAB_DIRECT);
19444 if (intmode == V4SImode || TARGET_AVX2)
19445 *xorp = expand_simple_binop (intmode, ASHIFT,
19446 gen_lowpart (intmode, tmp[0]),
19447 GEN_INT (31), NULL_RTX, 0,
19448 OPTAB_DIRECT);
19449 else
19451 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19452 two31 = ix86_build_const_vector (intmode, 1, two31);
19453 *xorp = expand_simple_binop (intmode, AND,
19454 gen_lowpart (intmode, tmp[0]),
19455 two31, NULL_RTX, 0,
19456 OPTAB_DIRECT);
19458 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19459 0, OPTAB_DIRECT);
19462 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19463 then replicate the value for all elements of the vector
19464 register. */
19467 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19469 int i, n_elt;
19470 rtvec v;
19471 machine_mode scalar_mode;
19473 switch (mode)
19475 case V64QImode:
19476 case V32QImode:
19477 case V16QImode:
19478 case V32HImode:
19479 case V16HImode:
19480 case V8HImode:
19481 case V16SImode:
19482 case V8SImode:
19483 case V4SImode:
19484 case V8DImode:
19485 case V4DImode:
19486 case V2DImode:
19487 gcc_assert (vect);
19488 case V16SFmode:
19489 case V8SFmode:
19490 case V4SFmode:
19491 case V8DFmode:
19492 case V4DFmode:
19493 case V2DFmode:
19494 n_elt = GET_MODE_NUNITS (mode);
19495 v = rtvec_alloc (n_elt);
19496 scalar_mode = GET_MODE_INNER (mode);
19498 RTVEC_ELT (v, 0) = value;
19500 for (i = 1; i < n_elt; ++i)
19501 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19503 return gen_rtx_CONST_VECTOR (mode, v);
19505 default:
19506 gcc_unreachable ();
19510 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19511 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19512 for an SSE register. If VECT is true, then replicate the mask for
19513 all elements of the vector register. If INVERT is true, then create
19514 a mask excluding the sign bit. */
19517 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19519 machine_mode vec_mode, imode;
19520 HOST_WIDE_INT hi, lo;
19521 int shift = 63;
19522 rtx v;
19523 rtx mask;
19525 /* Find the sign bit, sign extended to 2*HWI. */
19526 switch (mode)
19528 case V16SImode:
19529 case V16SFmode:
19530 case V8SImode:
19531 case V4SImode:
19532 case V8SFmode:
19533 case V4SFmode:
19534 vec_mode = mode;
19535 mode = GET_MODE_INNER (mode);
19536 imode = SImode;
19537 lo = 0x80000000, hi = lo < 0;
19538 break;
19540 case V8DImode:
19541 case V4DImode:
19542 case V2DImode:
19543 case V8DFmode:
19544 case V4DFmode:
19545 case V2DFmode:
19546 vec_mode = mode;
19547 mode = GET_MODE_INNER (mode);
19548 imode = DImode;
19549 if (HOST_BITS_PER_WIDE_INT >= 64)
19550 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19551 else
19552 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19553 break;
19555 case TImode:
19556 case TFmode:
19557 vec_mode = VOIDmode;
19558 if (HOST_BITS_PER_WIDE_INT >= 64)
19560 imode = TImode;
19561 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19563 else
19565 rtvec vec;
19567 imode = DImode;
19568 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19570 if (invert)
19572 lo = ~lo, hi = ~hi;
19573 v = constm1_rtx;
19575 else
19576 v = const0_rtx;
19578 mask = immed_double_const (lo, hi, imode);
19580 vec = gen_rtvec (2, v, mask);
19581 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19582 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19584 return v;
19586 break;
19588 default:
19589 gcc_unreachable ();
19592 if (invert)
19593 lo = ~lo, hi = ~hi;
19595 /* Force this value into the low part of a fp vector constant. */
19596 mask = immed_double_const (lo, hi, imode);
19597 mask = gen_lowpart (mode, mask);
19599 if (vec_mode == VOIDmode)
19600 return force_reg (mode, mask);
19602 v = ix86_build_const_vector (vec_mode, vect, mask);
19603 return force_reg (vec_mode, v);
19606 /* Generate code for floating point ABS or NEG. */
19608 void
19609 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19610 rtx operands[])
19612 rtx mask, set, dst, src;
19613 bool use_sse = false;
19614 bool vector_mode = VECTOR_MODE_P (mode);
19615 machine_mode vmode = mode;
19617 if (vector_mode)
19618 use_sse = true;
19619 else if (mode == TFmode)
19620 use_sse = true;
19621 else if (TARGET_SSE_MATH)
19623 use_sse = SSE_FLOAT_MODE_P (mode);
19624 if (mode == SFmode)
19625 vmode = V4SFmode;
19626 else if (mode == DFmode)
19627 vmode = V2DFmode;
19630 /* NEG and ABS performed with SSE use bitwise mask operations.
19631 Create the appropriate mask now. */
19632 if (use_sse)
19633 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19634 else
19635 mask = NULL_RTX;
19637 dst = operands[0];
19638 src = operands[1];
19640 set = gen_rtx_fmt_e (code, mode, src);
19641 set = gen_rtx_SET (VOIDmode, dst, set);
19643 if (mask)
19645 rtx use, clob;
19646 rtvec par;
19648 use = gen_rtx_USE (VOIDmode, mask);
19649 if (vector_mode)
19650 par = gen_rtvec (2, set, use);
19651 else
19653 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19654 par = gen_rtvec (3, set, use, clob);
19656 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19658 else
19659 emit_insn (set);
19662 /* Expand a copysign operation. Special case operand 0 being a constant. */
19664 void
19665 ix86_expand_copysign (rtx operands[])
19667 machine_mode mode, vmode;
19668 rtx dest, op0, op1, mask, nmask;
19670 dest = operands[0];
19671 op0 = operands[1];
19672 op1 = operands[2];
19674 mode = GET_MODE (dest);
19676 if (mode == SFmode)
19677 vmode = V4SFmode;
19678 else if (mode == DFmode)
19679 vmode = V2DFmode;
19680 else
19681 vmode = mode;
19683 if (GET_CODE (op0) == CONST_DOUBLE)
19685 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19687 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19688 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19690 if (mode == SFmode || mode == DFmode)
19692 if (op0 == CONST0_RTX (mode))
19693 op0 = CONST0_RTX (vmode);
19694 else
19696 rtx v = ix86_build_const_vector (vmode, false, op0);
19698 op0 = force_reg (vmode, v);
19701 else if (op0 != CONST0_RTX (mode))
19702 op0 = force_reg (mode, op0);
19704 mask = ix86_build_signbit_mask (vmode, 0, 0);
19706 if (mode == SFmode)
19707 copysign_insn = gen_copysignsf3_const;
19708 else if (mode == DFmode)
19709 copysign_insn = gen_copysigndf3_const;
19710 else
19711 copysign_insn = gen_copysigntf3_const;
19713 emit_insn (copysign_insn (dest, op0, op1, mask));
19715 else
19717 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19719 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19720 mask = ix86_build_signbit_mask (vmode, 0, 0);
19722 if (mode == SFmode)
19723 copysign_insn = gen_copysignsf3_var;
19724 else if (mode == DFmode)
19725 copysign_insn = gen_copysigndf3_var;
19726 else
19727 copysign_insn = gen_copysigntf3_var;
19729 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19733 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19734 be a constant, and so has already been expanded into a vector constant. */
19736 void
19737 ix86_split_copysign_const (rtx operands[])
19739 machine_mode mode, vmode;
19740 rtx dest, op0, mask, x;
19742 dest = operands[0];
19743 op0 = operands[1];
19744 mask = operands[3];
19746 mode = GET_MODE (dest);
19747 vmode = GET_MODE (mask);
19749 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19750 x = gen_rtx_AND (vmode, dest, mask);
19751 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19753 if (op0 != CONST0_RTX (vmode))
19755 x = gen_rtx_IOR (vmode, dest, op0);
19756 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19760 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19761 so we have to do two masks. */
19763 void
19764 ix86_split_copysign_var (rtx operands[])
19766 machine_mode mode, vmode;
19767 rtx dest, scratch, op0, op1, mask, nmask, x;
19769 dest = operands[0];
19770 scratch = operands[1];
19771 op0 = operands[2];
19772 op1 = operands[3];
19773 nmask = operands[4];
19774 mask = operands[5];
19776 mode = GET_MODE (dest);
19777 vmode = GET_MODE (mask);
19779 if (rtx_equal_p (op0, op1))
19781 /* Shouldn't happen often (it's useless, obviously), but when it does
19782 we'd generate incorrect code if we continue below. */
19783 emit_move_insn (dest, op0);
19784 return;
19787 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19789 gcc_assert (REGNO (op1) == REGNO (scratch));
19791 x = gen_rtx_AND (vmode, scratch, mask);
19792 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19794 dest = mask;
19795 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19796 x = gen_rtx_NOT (vmode, dest);
19797 x = gen_rtx_AND (vmode, x, op0);
19798 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19800 else
19802 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19804 x = gen_rtx_AND (vmode, scratch, mask);
19806 else /* alternative 2,4 */
19808 gcc_assert (REGNO (mask) == REGNO (scratch));
19809 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19810 x = gen_rtx_AND (vmode, scratch, op1);
19812 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19814 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19816 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19817 x = gen_rtx_AND (vmode, dest, nmask);
19819 else /* alternative 3,4 */
19821 gcc_assert (REGNO (nmask) == REGNO (dest));
19822 dest = nmask;
19823 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19824 x = gen_rtx_AND (vmode, dest, op0);
19826 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19829 x = gen_rtx_IOR (vmode, dest, scratch);
19830 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19833 /* Return TRUE or FALSE depending on whether the first SET in INSN
19834 has source and destination with matching CC modes, and that the
19835 CC mode is at least as constrained as REQ_MODE. */
19837 bool
19838 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19840 rtx set;
19841 machine_mode set_mode;
19843 set = PATTERN (insn);
19844 if (GET_CODE (set) == PARALLEL)
19845 set = XVECEXP (set, 0, 0);
19846 gcc_assert (GET_CODE (set) == SET);
19847 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19849 set_mode = GET_MODE (SET_DEST (set));
19850 switch (set_mode)
19852 case CCNOmode:
19853 if (req_mode != CCNOmode
19854 && (req_mode != CCmode
19855 || XEXP (SET_SRC (set), 1) != const0_rtx))
19856 return false;
19857 break;
19858 case CCmode:
19859 if (req_mode == CCGCmode)
19860 return false;
19861 /* FALLTHRU */
19862 case CCGCmode:
19863 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19864 return false;
19865 /* FALLTHRU */
19866 case CCGOCmode:
19867 if (req_mode == CCZmode)
19868 return false;
19869 /* FALLTHRU */
19870 case CCZmode:
19871 break;
19873 case CCAmode:
19874 case CCCmode:
19875 case CCOmode:
19876 case CCSmode:
19877 if (set_mode != req_mode)
19878 return false;
19879 break;
19881 default:
19882 gcc_unreachable ();
19885 return GET_MODE (SET_SRC (set)) == set_mode;
19888 /* Generate insn patterns to do an integer compare of OPERANDS. */
19890 static rtx
19891 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19893 machine_mode cmpmode;
19894 rtx tmp, flags;
19896 cmpmode = SELECT_CC_MODE (code, op0, op1);
19897 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19899 /* This is very simple, but making the interface the same as in the
19900 FP case makes the rest of the code easier. */
19901 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19902 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19904 /* Return the test that should be put into the flags user, i.e.
19905 the bcc, scc, or cmov instruction. */
19906 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19909 /* Figure out whether to use ordered or unordered fp comparisons.
19910 Return the appropriate mode to use. */
19912 machine_mode
19913 ix86_fp_compare_mode (enum rtx_code)
19915 /* ??? In order to make all comparisons reversible, we do all comparisons
19916 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19917 all forms trapping and nontrapping comparisons, we can make inequality
19918 comparisons trapping again, since it results in better code when using
19919 FCOM based compares. */
19920 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19923 machine_mode
19924 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19926 machine_mode mode = GET_MODE (op0);
19928 if (SCALAR_FLOAT_MODE_P (mode))
19930 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19931 return ix86_fp_compare_mode (code);
19934 switch (code)
19936 /* Only zero flag is needed. */
19937 case EQ: /* ZF=0 */
19938 case NE: /* ZF!=0 */
19939 return CCZmode;
19940 /* Codes needing carry flag. */
19941 case GEU: /* CF=0 */
19942 case LTU: /* CF=1 */
19943 /* Detect overflow checks. They need just the carry flag. */
19944 if (GET_CODE (op0) == PLUS
19945 && rtx_equal_p (op1, XEXP (op0, 0)))
19946 return CCCmode;
19947 else
19948 return CCmode;
19949 case GTU: /* CF=0 & ZF=0 */
19950 case LEU: /* CF=1 | ZF=1 */
19951 return CCmode;
19952 /* Codes possibly doable only with sign flag when
19953 comparing against zero. */
19954 case GE: /* SF=OF or SF=0 */
19955 case LT: /* SF<>OF or SF=1 */
19956 if (op1 == const0_rtx)
19957 return CCGOCmode;
19958 else
19959 /* For other cases Carry flag is not required. */
19960 return CCGCmode;
19961 /* Codes doable only with sign flag when comparing
19962 against zero, but we miss jump instruction for it
19963 so we need to use relational tests against overflow
19964 that thus needs to be zero. */
19965 case GT: /* ZF=0 & SF=OF */
19966 case LE: /* ZF=1 | SF<>OF */
19967 if (op1 == const0_rtx)
19968 return CCNOmode;
19969 else
19970 return CCGCmode;
19971 /* strcmp pattern do (use flags) and combine may ask us for proper
19972 mode. */
19973 case USE:
19974 return CCmode;
19975 default:
19976 gcc_unreachable ();
19980 /* Return the fixed registers used for condition codes. */
19982 static bool
19983 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19985 *p1 = FLAGS_REG;
19986 *p2 = FPSR_REG;
19987 return true;
19990 /* If two condition code modes are compatible, return a condition code
19991 mode which is compatible with both. Otherwise, return
19992 VOIDmode. */
19994 static machine_mode
19995 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19997 if (m1 == m2)
19998 return m1;
20000 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20001 return VOIDmode;
20003 if ((m1 == CCGCmode && m2 == CCGOCmode)
20004 || (m1 == CCGOCmode && m2 == CCGCmode))
20005 return CCGCmode;
20007 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20008 return m2;
20009 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20010 return m1;
20012 switch (m1)
20014 default:
20015 gcc_unreachable ();
20017 case CCmode:
20018 case CCGCmode:
20019 case CCGOCmode:
20020 case CCNOmode:
20021 case CCAmode:
20022 case CCCmode:
20023 case CCOmode:
20024 case CCSmode:
20025 case CCZmode:
20026 switch (m2)
20028 default:
20029 return VOIDmode;
20031 case CCmode:
20032 case CCGCmode:
20033 case CCGOCmode:
20034 case CCNOmode:
20035 case CCAmode:
20036 case CCCmode:
20037 case CCOmode:
20038 case CCSmode:
20039 case CCZmode:
20040 return CCmode;
20043 case CCFPmode:
20044 case CCFPUmode:
20045 /* These are only compatible with themselves, which we already
20046 checked above. */
20047 return VOIDmode;
20052 /* Return a comparison we can do and that it is equivalent to
20053 swap_condition (code) apart possibly from orderedness.
20054 But, never change orderedness if TARGET_IEEE_FP, returning
20055 UNKNOWN in that case if necessary. */
20057 static enum rtx_code
20058 ix86_fp_swap_condition (enum rtx_code code)
20060 switch (code)
20062 case GT: /* GTU - CF=0 & ZF=0 */
20063 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20064 case GE: /* GEU - CF=0 */
20065 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20066 case UNLT: /* LTU - CF=1 */
20067 return TARGET_IEEE_FP ? UNKNOWN : GT;
20068 case UNLE: /* LEU - CF=1 | ZF=1 */
20069 return TARGET_IEEE_FP ? UNKNOWN : GE;
20070 default:
20071 return swap_condition (code);
20075 /* Return cost of comparison CODE using the best strategy for performance.
20076 All following functions do use number of instructions as a cost metrics.
20077 In future this should be tweaked to compute bytes for optimize_size and
20078 take into account performance of various instructions on various CPUs. */
20080 static int
20081 ix86_fp_comparison_cost (enum rtx_code code)
20083 int arith_cost;
20085 /* The cost of code using bit-twiddling on %ah. */
20086 switch (code)
20088 case UNLE:
20089 case UNLT:
20090 case LTGT:
20091 case GT:
20092 case GE:
20093 case UNORDERED:
20094 case ORDERED:
20095 case UNEQ:
20096 arith_cost = 4;
20097 break;
20098 case LT:
20099 case NE:
20100 case EQ:
20101 case UNGE:
20102 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20103 break;
20104 case LE:
20105 case UNGT:
20106 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20107 break;
20108 default:
20109 gcc_unreachable ();
20112 switch (ix86_fp_comparison_strategy (code))
20114 case IX86_FPCMP_COMI:
20115 return arith_cost > 4 ? 3 : 2;
20116 case IX86_FPCMP_SAHF:
20117 return arith_cost > 4 ? 4 : 3;
20118 default:
20119 return arith_cost;
20123 /* Return strategy to use for floating-point. We assume that fcomi is always
20124 preferrable where available, since that is also true when looking at size
20125 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20127 enum ix86_fpcmp_strategy
20128 ix86_fp_comparison_strategy (enum rtx_code)
20130 /* Do fcomi/sahf based test when profitable. */
20132 if (TARGET_CMOVE)
20133 return IX86_FPCMP_COMI;
20135 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20136 return IX86_FPCMP_SAHF;
20138 return IX86_FPCMP_ARITH;
20141 /* Swap, force into registers, or otherwise massage the two operands
20142 to a fp comparison. The operands are updated in place; the new
20143 comparison code is returned. */
20145 static enum rtx_code
20146 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20148 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20149 rtx op0 = *pop0, op1 = *pop1;
20150 machine_mode op_mode = GET_MODE (op0);
20151 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20153 /* All of the unordered compare instructions only work on registers.
20154 The same is true of the fcomi compare instructions. The XFmode
20155 compare instructions require registers except when comparing
20156 against zero or when converting operand 1 from fixed point to
20157 floating point. */
20159 if (!is_sse
20160 && (fpcmp_mode == CCFPUmode
20161 || (op_mode == XFmode
20162 && ! (standard_80387_constant_p (op0) == 1
20163 || standard_80387_constant_p (op1) == 1)
20164 && GET_CODE (op1) != FLOAT)
20165 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20167 op0 = force_reg (op_mode, op0);
20168 op1 = force_reg (op_mode, op1);
20170 else
20172 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20173 things around if they appear profitable, otherwise force op0
20174 into a register. */
20176 if (standard_80387_constant_p (op0) == 0
20177 || (MEM_P (op0)
20178 && ! (standard_80387_constant_p (op1) == 0
20179 || MEM_P (op1))))
20181 enum rtx_code new_code = ix86_fp_swap_condition (code);
20182 if (new_code != UNKNOWN)
20184 std::swap (op0, op1);
20185 code = new_code;
20189 if (!REG_P (op0))
20190 op0 = force_reg (op_mode, op0);
20192 if (CONSTANT_P (op1))
20194 int tmp = standard_80387_constant_p (op1);
20195 if (tmp == 0)
20196 op1 = validize_mem (force_const_mem (op_mode, op1));
20197 else if (tmp == 1)
20199 if (TARGET_CMOVE)
20200 op1 = force_reg (op_mode, op1);
20202 else
20203 op1 = force_reg (op_mode, op1);
20207 /* Try to rearrange the comparison to make it cheaper. */
20208 if (ix86_fp_comparison_cost (code)
20209 > ix86_fp_comparison_cost (swap_condition (code))
20210 && (REG_P (op1) || can_create_pseudo_p ()))
20212 std::swap (op0, op1);
20213 code = swap_condition (code);
20214 if (!REG_P (op0))
20215 op0 = force_reg (op_mode, op0);
20218 *pop0 = op0;
20219 *pop1 = op1;
20220 return code;
20223 /* Convert comparison codes we use to represent FP comparison to integer
20224 code that will result in proper branch. Return UNKNOWN if no such code
20225 is available. */
20227 enum rtx_code
20228 ix86_fp_compare_code_to_integer (enum rtx_code code)
20230 switch (code)
20232 case GT:
20233 return GTU;
20234 case GE:
20235 return GEU;
20236 case ORDERED:
20237 case UNORDERED:
20238 return code;
20239 break;
20240 case UNEQ:
20241 return EQ;
20242 break;
20243 case UNLT:
20244 return LTU;
20245 break;
20246 case UNLE:
20247 return LEU;
20248 break;
20249 case LTGT:
20250 return NE;
20251 break;
20252 default:
20253 return UNKNOWN;
20257 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20259 static rtx
20260 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20262 machine_mode fpcmp_mode, intcmp_mode;
20263 rtx tmp, tmp2;
20265 fpcmp_mode = ix86_fp_compare_mode (code);
20266 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20268 /* Do fcomi/sahf based test when profitable. */
20269 switch (ix86_fp_comparison_strategy (code))
20271 case IX86_FPCMP_COMI:
20272 intcmp_mode = fpcmp_mode;
20273 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20274 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20275 tmp);
20276 emit_insn (tmp);
20277 break;
20279 case IX86_FPCMP_SAHF:
20280 intcmp_mode = fpcmp_mode;
20281 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20282 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20283 tmp);
20285 if (!scratch)
20286 scratch = gen_reg_rtx (HImode);
20287 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20288 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20289 break;
20291 case IX86_FPCMP_ARITH:
20292 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20293 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20294 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20295 if (!scratch)
20296 scratch = gen_reg_rtx (HImode);
20297 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20299 /* In the unordered case, we have to check C2 for NaN's, which
20300 doesn't happen to work out to anything nice combination-wise.
20301 So do some bit twiddling on the value we've got in AH to come
20302 up with an appropriate set of condition codes. */
20304 intcmp_mode = CCNOmode;
20305 switch (code)
20307 case GT:
20308 case UNGT:
20309 if (code == GT || !TARGET_IEEE_FP)
20311 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20312 code = EQ;
20314 else
20316 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20317 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20318 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20319 intcmp_mode = CCmode;
20320 code = GEU;
20322 break;
20323 case LT:
20324 case UNLT:
20325 if (code == LT && TARGET_IEEE_FP)
20327 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20328 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20329 intcmp_mode = CCmode;
20330 code = EQ;
20332 else
20334 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20335 code = NE;
20337 break;
20338 case GE:
20339 case UNGE:
20340 if (code == GE || !TARGET_IEEE_FP)
20342 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20343 code = EQ;
20345 else
20347 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20348 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20349 code = NE;
20351 break;
20352 case LE:
20353 case UNLE:
20354 if (code == LE && TARGET_IEEE_FP)
20356 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20357 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20358 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20359 intcmp_mode = CCmode;
20360 code = LTU;
20362 else
20364 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20365 code = NE;
20367 break;
20368 case EQ:
20369 case UNEQ:
20370 if (code == EQ && TARGET_IEEE_FP)
20372 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20373 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20374 intcmp_mode = CCmode;
20375 code = EQ;
20377 else
20379 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20380 code = NE;
20382 break;
20383 case NE:
20384 case LTGT:
20385 if (code == NE && TARGET_IEEE_FP)
20387 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20388 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20389 GEN_INT (0x40)));
20390 code = NE;
20392 else
20394 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20395 code = EQ;
20397 break;
20399 case UNORDERED:
20400 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20401 code = NE;
20402 break;
20403 case ORDERED:
20404 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20405 code = EQ;
20406 break;
20408 default:
20409 gcc_unreachable ();
20411 break;
20413 default:
20414 gcc_unreachable();
20417 /* Return the test that should be put into the flags user, i.e.
20418 the bcc, scc, or cmov instruction. */
20419 return gen_rtx_fmt_ee (code, VOIDmode,
20420 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20421 const0_rtx);
20424 static rtx
20425 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20427 rtx ret;
20429 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20430 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20432 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20434 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20435 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20437 else
20438 ret = ix86_expand_int_compare (code, op0, op1);
20440 return ret;
20443 void
20444 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20446 machine_mode mode = GET_MODE (op0);
20447 rtx tmp;
20449 switch (mode)
20451 case SFmode:
20452 case DFmode:
20453 case XFmode:
20454 case QImode:
20455 case HImode:
20456 case SImode:
20457 simple:
20458 tmp = ix86_expand_compare (code, op0, op1);
20459 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20460 gen_rtx_LABEL_REF (VOIDmode, label),
20461 pc_rtx);
20462 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20463 return;
20465 case DImode:
20466 if (TARGET_64BIT)
20467 goto simple;
20468 case TImode:
20469 /* Expand DImode branch into multiple compare+branch. */
20471 rtx lo[2], hi[2];
20472 rtx_code_label *label2;
20473 enum rtx_code code1, code2, code3;
20474 machine_mode submode;
20476 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20478 std::swap (op0, op1);
20479 code = swap_condition (code);
20482 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20483 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20485 submode = mode == DImode ? SImode : DImode;
20487 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20488 avoid two branches. This costs one extra insn, so disable when
20489 optimizing for size. */
20491 if ((code == EQ || code == NE)
20492 && (!optimize_insn_for_size_p ()
20493 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20495 rtx xor0, xor1;
20497 xor1 = hi[0];
20498 if (hi[1] != const0_rtx)
20499 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20500 NULL_RTX, 0, OPTAB_WIDEN);
20502 xor0 = lo[0];
20503 if (lo[1] != const0_rtx)
20504 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20505 NULL_RTX, 0, OPTAB_WIDEN);
20507 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20508 NULL_RTX, 0, OPTAB_WIDEN);
20510 ix86_expand_branch (code, tmp, const0_rtx, label);
20511 return;
20514 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20515 op1 is a constant and the low word is zero, then we can just
20516 examine the high word. Similarly for low word -1 and
20517 less-or-equal-than or greater-than. */
20519 if (CONST_INT_P (hi[1]))
20520 switch (code)
20522 case LT: case LTU: case GE: case GEU:
20523 if (lo[1] == const0_rtx)
20525 ix86_expand_branch (code, hi[0], hi[1], label);
20526 return;
20528 break;
20529 case LE: case LEU: case GT: case GTU:
20530 if (lo[1] == constm1_rtx)
20532 ix86_expand_branch (code, hi[0], hi[1], label);
20533 return;
20535 break;
20536 default:
20537 break;
20540 /* Otherwise, we need two or three jumps. */
20542 label2 = gen_label_rtx ();
20544 code1 = code;
20545 code2 = swap_condition (code);
20546 code3 = unsigned_condition (code);
20548 switch (code)
20550 case LT: case GT: case LTU: case GTU:
20551 break;
20553 case LE: code1 = LT; code2 = GT; break;
20554 case GE: code1 = GT; code2 = LT; break;
20555 case LEU: code1 = LTU; code2 = GTU; break;
20556 case GEU: code1 = GTU; code2 = LTU; break;
20558 case EQ: code1 = UNKNOWN; code2 = NE; break;
20559 case NE: code2 = UNKNOWN; break;
20561 default:
20562 gcc_unreachable ();
20566 * a < b =>
20567 * if (hi(a) < hi(b)) goto true;
20568 * if (hi(a) > hi(b)) goto false;
20569 * if (lo(a) < lo(b)) goto true;
20570 * false:
20573 if (code1 != UNKNOWN)
20574 ix86_expand_branch (code1, hi[0], hi[1], label);
20575 if (code2 != UNKNOWN)
20576 ix86_expand_branch (code2, hi[0], hi[1], label2);
20578 ix86_expand_branch (code3, lo[0], lo[1], label);
20580 if (code2 != UNKNOWN)
20581 emit_label (label2);
20582 return;
20585 default:
20586 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20587 goto simple;
20591 /* Split branch based on floating point condition. */
20592 void
20593 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20594 rtx target1, rtx target2, rtx tmp)
20596 rtx condition;
20597 rtx i;
20599 if (target2 != pc_rtx)
20601 std::swap (target1, target2);
20602 code = reverse_condition_maybe_unordered (code);
20605 condition = ix86_expand_fp_compare (code, op1, op2,
20606 tmp);
20608 i = emit_jump_insn (gen_rtx_SET
20609 (VOIDmode, pc_rtx,
20610 gen_rtx_IF_THEN_ELSE (VOIDmode,
20611 condition, target1, target2)));
20612 if (split_branch_probability >= 0)
20613 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20616 void
20617 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20619 rtx ret;
20621 gcc_assert (GET_MODE (dest) == QImode);
20623 ret = ix86_expand_compare (code, op0, op1);
20624 PUT_MODE (ret, QImode);
20625 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20628 /* Expand comparison setting or clearing carry flag. Return true when
20629 successful and set pop for the operation. */
20630 static bool
20631 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20633 machine_mode mode =
20634 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20636 /* Do not handle double-mode compares that go through special path. */
20637 if (mode == (TARGET_64BIT ? TImode : DImode))
20638 return false;
20640 if (SCALAR_FLOAT_MODE_P (mode))
20642 rtx compare_op;
20643 rtx_insn *compare_seq;
20645 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20647 /* Shortcut: following common codes never translate
20648 into carry flag compares. */
20649 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20650 || code == ORDERED || code == UNORDERED)
20651 return false;
20653 /* These comparisons require zero flag; swap operands so they won't. */
20654 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20655 && !TARGET_IEEE_FP)
20657 std::swap (op0, op1);
20658 code = swap_condition (code);
20661 /* Try to expand the comparison and verify that we end up with
20662 carry flag based comparison. This fails to be true only when
20663 we decide to expand comparison using arithmetic that is not
20664 too common scenario. */
20665 start_sequence ();
20666 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20667 compare_seq = get_insns ();
20668 end_sequence ();
20670 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20671 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20672 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20673 else
20674 code = GET_CODE (compare_op);
20676 if (code != LTU && code != GEU)
20677 return false;
20679 emit_insn (compare_seq);
20680 *pop = compare_op;
20681 return true;
20684 if (!INTEGRAL_MODE_P (mode))
20685 return false;
20687 switch (code)
20689 case LTU:
20690 case GEU:
20691 break;
20693 /* Convert a==0 into (unsigned)a<1. */
20694 case EQ:
20695 case NE:
20696 if (op1 != const0_rtx)
20697 return false;
20698 op1 = const1_rtx;
20699 code = (code == EQ ? LTU : GEU);
20700 break;
20702 /* Convert a>b into b<a or a>=b-1. */
20703 case GTU:
20704 case LEU:
20705 if (CONST_INT_P (op1))
20707 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20708 /* Bail out on overflow. We still can swap operands but that
20709 would force loading of the constant into register. */
20710 if (op1 == const0_rtx
20711 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20712 return false;
20713 code = (code == GTU ? GEU : LTU);
20715 else
20717 std::swap (op0, op1);
20718 code = (code == GTU ? LTU : GEU);
20720 break;
20722 /* Convert a>=0 into (unsigned)a<0x80000000. */
20723 case LT:
20724 case GE:
20725 if (mode == DImode || op1 != const0_rtx)
20726 return false;
20727 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20728 code = (code == LT ? GEU : LTU);
20729 break;
20730 case LE:
20731 case GT:
20732 if (mode == DImode || op1 != constm1_rtx)
20733 return false;
20734 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20735 code = (code == LE ? GEU : LTU);
20736 break;
20738 default:
20739 return false;
20741 /* Swapping operands may cause constant to appear as first operand. */
20742 if (!nonimmediate_operand (op0, VOIDmode))
20744 if (!can_create_pseudo_p ())
20745 return false;
20746 op0 = force_reg (mode, op0);
20748 *pop = ix86_expand_compare (code, op0, op1);
20749 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20750 return true;
20753 bool
20754 ix86_expand_int_movcc (rtx operands[])
20756 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20757 rtx_insn *compare_seq;
20758 rtx compare_op;
20759 machine_mode mode = GET_MODE (operands[0]);
20760 bool sign_bit_compare_p = false;
20761 rtx op0 = XEXP (operands[1], 0);
20762 rtx op1 = XEXP (operands[1], 1);
20764 if (GET_MODE (op0) == TImode
20765 || (GET_MODE (op0) == DImode
20766 && !TARGET_64BIT))
20767 return false;
20769 start_sequence ();
20770 compare_op = ix86_expand_compare (code, op0, op1);
20771 compare_seq = get_insns ();
20772 end_sequence ();
20774 compare_code = GET_CODE (compare_op);
20776 if ((op1 == const0_rtx && (code == GE || code == LT))
20777 || (op1 == constm1_rtx && (code == GT || code == LE)))
20778 sign_bit_compare_p = true;
20780 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20781 HImode insns, we'd be swallowed in word prefix ops. */
20783 if ((mode != HImode || TARGET_FAST_PREFIX)
20784 && (mode != (TARGET_64BIT ? TImode : DImode))
20785 && CONST_INT_P (operands[2])
20786 && CONST_INT_P (operands[3]))
20788 rtx out = operands[0];
20789 HOST_WIDE_INT ct = INTVAL (operands[2]);
20790 HOST_WIDE_INT cf = INTVAL (operands[3]);
20791 HOST_WIDE_INT diff;
20793 diff = ct - cf;
20794 /* Sign bit compares are better done using shifts than we do by using
20795 sbb. */
20796 if (sign_bit_compare_p
20797 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20799 /* Detect overlap between destination and compare sources. */
20800 rtx tmp = out;
20802 if (!sign_bit_compare_p)
20804 rtx flags;
20805 bool fpcmp = false;
20807 compare_code = GET_CODE (compare_op);
20809 flags = XEXP (compare_op, 0);
20811 if (GET_MODE (flags) == CCFPmode
20812 || GET_MODE (flags) == CCFPUmode)
20814 fpcmp = true;
20815 compare_code
20816 = ix86_fp_compare_code_to_integer (compare_code);
20819 /* To simplify rest of code, restrict to the GEU case. */
20820 if (compare_code == LTU)
20822 std::swap (ct, cf);
20823 compare_code = reverse_condition (compare_code);
20824 code = reverse_condition (code);
20826 else
20828 if (fpcmp)
20829 PUT_CODE (compare_op,
20830 reverse_condition_maybe_unordered
20831 (GET_CODE (compare_op)));
20832 else
20833 PUT_CODE (compare_op,
20834 reverse_condition (GET_CODE (compare_op)));
20836 diff = ct - cf;
20838 if (reg_overlap_mentioned_p (out, op0)
20839 || reg_overlap_mentioned_p (out, op1))
20840 tmp = gen_reg_rtx (mode);
20842 if (mode == DImode)
20843 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20844 else
20845 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20846 flags, compare_op));
20848 else
20850 if (code == GT || code == GE)
20851 code = reverse_condition (code);
20852 else
20854 std::swap (ct, cf);
20855 diff = ct - cf;
20857 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20860 if (diff == 1)
20863 * cmpl op0,op1
20864 * sbbl dest,dest
20865 * [addl dest, ct]
20867 * Size 5 - 8.
20869 if (ct)
20870 tmp = expand_simple_binop (mode, PLUS,
20871 tmp, GEN_INT (ct),
20872 copy_rtx (tmp), 1, OPTAB_DIRECT);
20874 else if (cf == -1)
20877 * cmpl op0,op1
20878 * sbbl dest,dest
20879 * orl $ct, dest
20881 * Size 8.
20883 tmp = expand_simple_binop (mode, IOR,
20884 tmp, GEN_INT (ct),
20885 copy_rtx (tmp), 1, OPTAB_DIRECT);
20887 else if (diff == -1 && ct)
20890 * cmpl op0,op1
20891 * sbbl dest,dest
20892 * notl dest
20893 * [addl dest, cf]
20895 * Size 8 - 11.
20897 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20898 if (cf)
20899 tmp = expand_simple_binop (mode, PLUS,
20900 copy_rtx (tmp), GEN_INT (cf),
20901 copy_rtx (tmp), 1, OPTAB_DIRECT);
20903 else
20906 * cmpl op0,op1
20907 * sbbl dest,dest
20908 * [notl dest]
20909 * andl cf - ct, dest
20910 * [addl dest, ct]
20912 * Size 8 - 11.
20915 if (cf == 0)
20917 cf = ct;
20918 ct = 0;
20919 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20922 tmp = expand_simple_binop (mode, AND,
20923 copy_rtx (tmp),
20924 gen_int_mode (cf - ct, mode),
20925 copy_rtx (tmp), 1, OPTAB_DIRECT);
20926 if (ct)
20927 tmp = expand_simple_binop (mode, PLUS,
20928 copy_rtx (tmp), GEN_INT (ct),
20929 copy_rtx (tmp), 1, OPTAB_DIRECT);
20932 if (!rtx_equal_p (tmp, out))
20933 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20935 return true;
20938 if (diff < 0)
20940 machine_mode cmp_mode = GET_MODE (op0);
20941 enum rtx_code new_code;
20943 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20945 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20947 /* We may be reversing unordered compare to normal compare, that
20948 is not valid in general (we may convert non-trapping condition
20949 to trapping one), however on i386 we currently emit all
20950 comparisons unordered. */
20951 new_code = reverse_condition_maybe_unordered (code);
20953 else
20954 new_code = ix86_reverse_condition (code, cmp_mode);
20955 if (new_code != UNKNOWN)
20957 std::swap (ct, cf);
20958 diff = -diff;
20959 code = new_code;
20963 compare_code = UNKNOWN;
20964 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20965 && CONST_INT_P (op1))
20967 if (op1 == const0_rtx
20968 && (code == LT || code == GE))
20969 compare_code = code;
20970 else if (op1 == constm1_rtx)
20972 if (code == LE)
20973 compare_code = LT;
20974 else if (code == GT)
20975 compare_code = GE;
20979 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20980 if (compare_code != UNKNOWN
20981 && GET_MODE (op0) == GET_MODE (out)
20982 && (cf == -1 || ct == -1))
20984 /* If lea code below could be used, only optimize
20985 if it results in a 2 insn sequence. */
20987 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20988 || diff == 3 || diff == 5 || diff == 9)
20989 || (compare_code == LT && ct == -1)
20990 || (compare_code == GE && cf == -1))
20993 * notl op1 (if necessary)
20994 * sarl $31, op1
20995 * orl cf, op1
20997 if (ct != -1)
20999 cf = ct;
21000 ct = -1;
21001 code = reverse_condition (code);
21004 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21006 out = expand_simple_binop (mode, IOR,
21007 out, GEN_INT (cf),
21008 out, 1, OPTAB_DIRECT);
21009 if (out != operands[0])
21010 emit_move_insn (operands[0], out);
21012 return true;
21017 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21018 || diff == 3 || diff == 5 || diff == 9)
21019 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21020 && (mode != DImode
21021 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21024 * xorl dest,dest
21025 * cmpl op1,op2
21026 * setcc dest
21027 * lea cf(dest*(ct-cf)),dest
21029 * Size 14.
21031 * This also catches the degenerate setcc-only case.
21034 rtx tmp;
21035 int nops;
21037 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21039 nops = 0;
21040 /* On x86_64 the lea instruction operates on Pmode, so we need
21041 to get arithmetics done in proper mode to match. */
21042 if (diff == 1)
21043 tmp = copy_rtx (out);
21044 else
21046 rtx out1;
21047 out1 = copy_rtx (out);
21048 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21049 nops++;
21050 if (diff & 1)
21052 tmp = gen_rtx_PLUS (mode, tmp, out1);
21053 nops++;
21056 if (cf != 0)
21058 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21059 nops++;
21061 if (!rtx_equal_p (tmp, out))
21063 if (nops == 1)
21064 out = force_operand (tmp, copy_rtx (out));
21065 else
21066 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21068 if (!rtx_equal_p (out, operands[0]))
21069 emit_move_insn (operands[0], copy_rtx (out));
21071 return true;
21075 * General case: Jumpful:
21076 * xorl dest,dest cmpl op1, op2
21077 * cmpl op1, op2 movl ct, dest
21078 * setcc dest jcc 1f
21079 * decl dest movl cf, dest
21080 * andl (cf-ct),dest 1:
21081 * addl ct,dest
21083 * Size 20. Size 14.
21085 * This is reasonably steep, but branch mispredict costs are
21086 * high on modern cpus, so consider failing only if optimizing
21087 * for space.
21090 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21091 && BRANCH_COST (optimize_insn_for_speed_p (),
21092 false) >= 2)
21094 if (cf == 0)
21096 machine_mode cmp_mode = GET_MODE (op0);
21097 enum rtx_code new_code;
21099 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21101 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21103 /* We may be reversing unordered compare to normal compare,
21104 that is not valid in general (we may convert non-trapping
21105 condition to trapping one), however on i386 we currently
21106 emit all comparisons unordered. */
21107 new_code = reverse_condition_maybe_unordered (code);
21109 else
21111 new_code = ix86_reverse_condition (code, cmp_mode);
21112 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21113 compare_code = reverse_condition (compare_code);
21116 if (new_code != UNKNOWN)
21118 cf = ct;
21119 ct = 0;
21120 code = new_code;
21124 if (compare_code != UNKNOWN)
21126 /* notl op1 (if needed)
21127 sarl $31, op1
21128 andl (cf-ct), op1
21129 addl ct, op1
21131 For x < 0 (resp. x <= -1) there will be no notl,
21132 so if possible swap the constants to get rid of the
21133 complement.
21134 True/false will be -1/0 while code below (store flag
21135 followed by decrement) is 0/-1, so the constants need
21136 to be exchanged once more. */
21138 if (compare_code == GE || !cf)
21140 code = reverse_condition (code);
21141 compare_code = LT;
21143 else
21144 std::swap (ct, cf);
21146 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21148 else
21150 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21152 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21153 constm1_rtx,
21154 copy_rtx (out), 1, OPTAB_DIRECT);
21157 out = expand_simple_binop (mode, AND, copy_rtx (out),
21158 gen_int_mode (cf - ct, mode),
21159 copy_rtx (out), 1, OPTAB_DIRECT);
21160 if (ct)
21161 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21162 copy_rtx (out), 1, OPTAB_DIRECT);
21163 if (!rtx_equal_p (out, operands[0]))
21164 emit_move_insn (operands[0], copy_rtx (out));
21166 return true;
21170 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21172 /* Try a few things more with specific constants and a variable. */
21174 optab op;
21175 rtx var, orig_out, out, tmp;
21177 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21178 return false;
21180 /* If one of the two operands is an interesting constant, load a
21181 constant with the above and mask it in with a logical operation. */
21183 if (CONST_INT_P (operands[2]))
21185 var = operands[3];
21186 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21187 operands[3] = constm1_rtx, op = and_optab;
21188 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21189 operands[3] = const0_rtx, op = ior_optab;
21190 else
21191 return false;
21193 else if (CONST_INT_P (operands[3]))
21195 var = operands[2];
21196 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21197 operands[2] = constm1_rtx, op = and_optab;
21198 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21199 operands[2] = const0_rtx, op = ior_optab;
21200 else
21201 return false;
21203 else
21204 return false;
21206 orig_out = operands[0];
21207 tmp = gen_reg_rtx (mode);
21208 operands[0] = tmp;
21210 /* Recurse to get the constant loaded. */
21211 if (ix86_expand_int_movcc (operands) == 0)
21212 return false;
21214 /* Mask in the interesting variable. */
21215 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21216 OPTAB_WIDEN);
21217 if (!rtx_equal_p (out, orig_out))
21218 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21220 return true;
21224 * For comparison with above,
21226 * movl cf,dest
21227 * movl ct,tmp
21228 * cmpl op1,op2
21229 * cmovcc tmp,dest
21231 * Size 15.
21234 if (! nonimmediate_operand (operands[2], mode))
21235 operands[2] = force_reg (mode, operands[2]);
21236 if (! nonimmediate_operand (operands[3], mode))
21237 operands[3] = force_reg (mode, operands[3]);
21239 if (! register_operand (operands[2], VOIDmode)
21240 && (mode == QImode
21241 || ! register_operand (operands[3], VOIDmode)))
21242 operands[2] = force_reg (mode, operands[2]);
21244 if (mode == QImode
21245 && ! register_operand (operands[3], VOIDmode))
21246 operands[3] = force_reg (mode, operands[3]);
21248 emit_insn (compare_seq);
21249 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21250 gen_rtx_IF_THEN_ELSE (mode,
21251 compare_op, operands[2],
21252 operands[3])));
21253 return true;
21256 /* Swap, force into registers, or otherwise massage the two operands
21257 to an sse comparison with a mask result. Thus we differ a bit from
21258 ix86_prepare_fp_compare_args which expects to produce a flags result.
21260 The DEST operand exists to help determine whether to commute commutative
21261 operators. The POP0/POP1 operands are updated in place. The new
21262 comparison code is returned, or UNKNOWN if not implementable. */
21264 static enum rtx_code
21265 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21266 rtx *pop0, rtx *pop1)
21268 switch (code)
21270 case LTGT:
21271 case UNEQ:
21272 /* AVX supports all the needed comparisons. */
21273 if (TARGET_AVX)
21274 break;
21275 /* We have no LTGT as an operator. We could implement it with
21276 NE & ORDERED, but this requires an extra temporary. It's
21277 not clear that it's worth it. */
21278 return UNKNOWN;
21280 case LT:
21281 case LE:
21282 case UNGT:
21283 case UNGE:
21284 /* These are supported directly. */
21285 break;
21287 case EQ:
21288 case NE:
21289 case UNORDERED:
21290 case ORDERED:
21291 /* AVX has 3 operand comparisons, no need to swap anything. */
21292 if (TARGET_AVX)
21293 break;
21294 /* For commutative operators, try to canonicalize the destination
21295 operand to be first in the comparison - this helps reload to
21296 avoid extra moves. */
21297 if (!dest || !rtx_equal_p (dest, *pop1))
21298 break;
21299 /* FALLTHRU */
21301 case GE:
21302 case GT:
21303 case UNLE:
21304 case UNLT:
21305 /* These are not supported directly before AVX, and furthermore
21306 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21307 comparison operands to transform into something that is
21308 supported. */
21309 std::swap (*pop0, *pop1);
21310 code = swap_condition (code);
21311 break;
21313 default:
21314 gcc_unreachable ();
21317 return code;
21320 /* Detect conditional moves that exactly match min/max operational
21321 semantics. Note that this is IEEE safe, as long as we don't
21322 interchange the operands.
21324 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21325 and TRUE if the operation is successful and instructions are emitted. */
21327 static bool
21328 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21329 rtx cmp_op1, rtx if_true, rtx if_false)
21331 machine_mode mode;
21332 bool is_min;
21333 rtx tmp;
21335 if (code == LT)
21337 else if (code == UNGE)
21338 std::swap (if_true, if_false);
21339 else
21340 return false;
21342 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21343 is_min = true;
21344 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21345 is_min = false;
21346 else
21347 return false;
21349 mode = GET_MODE (dest);
21351 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21352 but MODE may be a vector mode and thus not appropriate. */
21353 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21355 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21356 rtvec v;
21358 if_true = force_reg (mode, if_true);
21359 v = gen_rtvec (2, if_true, if_false);
21360 tmp = gen_rtx_UNSPEC (mode, v, u);
21362 else
21364 code = is_min ? SMIN : SMAX;
21365 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21368 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21369 return true;
21372 /* Expand an sse vector comparison. Return the register with the result. */
21374 static rtx
21375 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21376 rtx op_true, rtx op_false)
21378 machine_mode mode = GET_MODE (dest);
21379 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21381 /* In general case result of comparison can differ from operands' type. */
21382 machine_mode cmp_mode;
21384 /* In AVX512F the result of comparison is an integer mask. */
21385 bool maskcmp = false;
21386 rtx x;
21388 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21390 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21391 gcc_assert (cmp_mode != BLKmode);
21393 maskcmp = true;
21395 else
21396 cmp_mode = cmp_ops_mode;
21399 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21400 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21401 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21403 if (optimize
21404 || reg_overlap_mentioned_p (dest, op_true)
21405 || reg_overlap_mentioned_p (dest, op_false))
21406 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21408 /* Compare patterns for int modes are unspec in AVX512F only. */
21409 if (maskcmp && (code == GT || code == EQ))
21411 rtx (*gen)(rtx, rtx, rtx);
21413 switch (cmp_ops_mode)
21415 case V64QImode:
21416 gcc_assert (TARGET_AVX512BW);
21417 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21418 break;
21419 case V32HImode:
21420 gcc_assert (TARGET_AVX512BW);
21421 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21422 break;
21423 case V16SImode:
21424 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21425 break;
21426 case V8DImode:
21427 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21428 break;
21429 default:
21430 gen = NULL;
21433 if (gen)
21435 emit_insn (gen (dest, cmp_op0, cmp_op1));
21436 return dest;
21439 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21441 if (cmp_mode != mode && !maskcmp)
21443 x = force_reg (cmp_ops_mode, x);
21444 convert_move (dest, x, false);
21446 else
21447 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21449 return dest;
21452 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21453 operations. This is used for both scalar and vector conditional moves. */
21455 static void
21456 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21458 machine_mode mode = GET_MODE (dest);
21459 machine_mode cmpmode = GET_MODE (cmp);
21461 /* In AVX512F the result of comparison is an integer mask. */
21462 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21464 rtx t2, t3, x;
21466 if (vector_all_ones_operand (op_true, mode)
21467 && rtx_equal_p (op_false, CONST0_RTX (mode))
21468 && !maskcmp)
21470 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21472 else if (op_false == CONST0_RTX (mode)
21473 && !maskcmp)
21475 op_true = force_reg (mode, op_true);
21476 x = gen_rtx_AND (mode, cmp, op_true);
21477 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21479 else if (op_true == CONST0_RTX (mode)
21480 && !maskcmp)
21482 op_false = force_reg (mode, op_false);
21483 x = gen_rtx_NOT (mode, cmp);
21484 x = gen_rtx_AND (mode, x, op_false);
21485 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21487 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21488 && !maskcmp)
21490 op_false = force_reg (mode, op_false);
21491 x = gen_rtx_IOR (mode, cmp, op_false);
21492 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21494 else if (TARGET_XOP
21495 && !maskcmp)
21497 op_true = force_reg (mode, op_true);
21499 if (!nonimmediate_operand (op_false, mode))
21500 op_false = force_reg (mode, op_false);
21502 emit_insn (gen_rtx_SET (mode, dest,
21503 gen_rtx_IF_THEN_ELSE (mode, cmp,
21504 op_true,
21505 op_false)));
21507 else
21509 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21510 rtx d = dest;
21512 if (!nonimmediate_operand (op_true, mode))
21513 op_true = force_reg (mode, op_true);
21515 op_false = force_reg (mode, op_false);
21517 switch (mode)
21519 case V4SFmode:
21520 if (TARGET_SSE4_1)
21521 gen = gen_sse4_1_blendvps;
21522 break;
21523 case V2DFmode:
21524 if (TARGET_SSE4_1)
21525 gen = gen_sse4_1_blendvpd;
21526 break;
21527 case V16QImode:
21528 case V8HImode:
21529 case V4SImode:
21530 case V2DImode:
21531 if (TARGET_SSE4_1)
21533 gen = gen_sse4_1_pblendvb;
21534 if (mode != V16QImode)
21535 d = gen_reg_rtx (V16QImode);
21536 op_false = gen_lowpart (V16QImode, op_false);
21537 op_true = gen_lowpart (V16QImode, op_true);
21538 cmp = gen_lowpart (V16QImode, cmp);
21540 break;
21541 case V8SFmode:
21542 if (TARGET_AVX)
21543 gen = gen_avx_blendvps256;
21544 break;
21545 case V4DFmode:
21546 if (TARGET_AVX)
21547 gen = gen_avx_blendvpd256;
21548 break;
21549 case V32QImode:
21550 case V16HImode:
21551 case V8SImode:
21552 case V4DImode:
21553 if (TARGET_AVX2)
21555 gen = gen_avx2_pblendvb;
21556 if (mode != V32QImode)
21557 d = gen_reg_rtx (V32QImode);
21558 op_false = gen_lowpart (V32QImode, op_false);
21559 op_true = gen_lowpart (V32QImode, op_true);
21560 cmp = gen_lowpart (V32QImode, cmp);
21562 break;
21564 case V64QImode:
21565 gen = gen_avx512bw_blendmv64qi;
21566 break;
21567 case V32HImode:
21568 gen = gen_avx512bw_blendmv32hi;
21569 break;
21570 case V16SImode:
21571 gen = gen_avx512f_blendmv16si;
21572 break;
21573 case V8DImode:
21574 gen = gen_avx512f_blendmv8di;
21575 break;
21576 case V8DFmode:
21577 gen = gen_avx512f_blendmv8df;
21578 break;
21579 case V16SFmode:
21580 gen = gen_avx512f_blendmv16sf;
21581 break;
21583 default:
21584 break;
21587 if (gen != NULL)
21589 emit_insn (gen (d, op_false, op_true, cmp));
21590 if (d != dest)
21591 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21593 else
21595 op_true = force_reg (mode, op_true);
21597 t2 = gen_reg_rtx (mode);
21598 if (optimize)
21599 t3 = gen_reg_rtx (mode);
21600 else
21601 t3 = dest;
21603 x = gen_rtx_AND (mode, op_true, cmp);
21604 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21606 x = gen_rtx_NOT (mode, cmp);
21607 x = gen_rtx_AND (mode, x, op_false);
21608 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21610 x = gen_rtx_IOR (mode, t3, t2);
21611 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21616 /* Expand a floating-point conditional move. Return true if successful. */
21618 bool
21619 ix86_expand_fp_movcc (rtx operands[])
21621 machine_mode mode = GET_MODE (operands[0]);
21622 enum rtx_code code = GET_CODE (operands[1]);
21623 rtx tmp, compare_op;
21624 rtx op0 = XEXP (operands[1], 0);
21625 rtx op1 = XEXP (operands[1], 1);
21627 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21629 machine_mode cmode;
21631 /* Since we've no cmove for sse registers, don't force bad register
21632 allocation just to gain access to it. Deny movcc when the
21633 comparison mode doesn't match the move mode. */
21634 cmode = GET_MODE (op0);
21635 if (cmode == VOIDmode)
21636 cmode = GET_MODE (op1);
21637 if (cmode != mode)
21638 return false;
21640 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21641 if (code == UNKNOWN)
21642 return false;
21644 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21645 operands[2], operands[3]))
21646 return true;
21648 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21649 operands[2], operands[3]);
21650 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21651 return true;
21654 if (GET_MODE (op0) == TImode
21655 || (GET_MODE (op0) == DImode
21656 && !TARGET_64BIT))
21657 return false;
21659 /* The floating point conditional move instructions don't directly
21660 support conditions resulting from a signed integer comparison. */
21662 compare_op = ix86_expand_compare (code, op0, op1);
21663 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21665 tmp = gen_reg_rtx (QImode);
21666 ix86_expand_setcc (tmp, code, op0, op1);
21668 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21671 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21672 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21673 operands[2], operands[3])));
21675 return true;
21678 /* Expand a floating-point vector conditional move; a vcond operation
21679 rather than a movcc operation. */
21681 bool
21682 ix86_expand_fp_vcond (rtx operands[])
21684 enum rtx_code code = GET_CODE (operands[3]);
21685 rtx cmp;
21687 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21688 &operands[4], &operands[5]);
21689 if (code == UNKNOWN)
21691 rtx temp;
21692 switch (GET_CODE (operands[3]))
21694 case LTGT:
21695 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21696 operands[5], operands[0], operands[0]);
21697 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21698 operands[5], operands[1], operands[2]);
21699 code = AND;
21700 break;
21701 case UNEQ:
21702 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21703 operands[5], operands[0], operands[0]);
21704 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21705 operands[5], operands[1], operands[2]);
21706 code = IOR;
21707 break;
21708 default:
21709 gcc_unreachable ();
21711 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21712 OPTAB_DIRECT);
21713 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21714 return true;
21717 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21718 operands[5], operands[1], operands[2]))
21719 return true;
21721 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21722 operands[1], operands[2]);
21723 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21724 return true;
21727 /* Expand a signed/unsigned integral vector conditional move. */
21729 bool
21730 ix86_expand_int_vcond (rtx operands[])
21732 machine_mode data_mode = GET_MODE (operands[0]);
21733 machine_mode mode = GET_MODE (operands[4]);
21734 enum rtx_code code = GET_CODE (operands[3]);
21735 bool negate = false;
21736 rtx x, cop0, cop1;
21738 cop0 = operands[4];
21739 cop1 = operands[5];
21741 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21742 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21743 if ((code == LT || code == GE)
21744 && data_mode == mode
21745 && cop1 == CONST0_RTX (mode)
21746 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21747 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21748 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21749 && (GET_MODE_SIZE (data_mode) == 16
21750 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21752 rtx negop = operands[2 - (code == LT)];
21753 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21754 if (negop == CONST1_RTX (data_mode))
21756 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21757 operands[0], 1, OPTAB_DIRECT);
21758 if (res != operands[0])
21759 emit_move_insn (operands[0], res);
21760 return true;
21762 else if (GET_MODE_INNER (data_mode) != DImode
21763 && vector_all_ones_operand (negop, data_mode))
21765 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21766 operands[0], 0, OPTAB_DIRECT);
21767 if (res != operands[0])
21768 emit_move_insn (operands[0], res);
21769 return true;
21773 if (!nonimmediate_operand (cop1, mode))
21774 cop1 = force_reg (mode, cop1);
21775 if (!general_operand (operands[1], data_mode))
21776 operands[1] = force_reg (data_mode, operands[1]);
21777 if (!general_operand (operands[2], data_mode))
21778 operands[2] = force_reg (data_mode, operands[2]);
21780 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21781 if (TARGET_XOP
21782 && (mode == V16QImode || mode == V8HImode
21783 || mode == V4SImode || mode == V2DImode))
21785 else
21787 /* Canonicalize the comparison to EQ, GT, GTU. */
21788 switch (code)
21790 case EQ:
21791 case GT:
21792 case GTU:
21793 break;
21795 case NE:
21796 case LE:
21797 case LEU:
21798 code = reverse_condition (code);
21799 negate = true;
21800 break;
21802 case GE:
21803 case GEU:
21804 code = reverse_condition (code);
21805 negate = true;
21806 /* FALLTHRU */
21808 case LT:
21809 case LTU:
21810 std::swap (cop0, cop1);
21811 code = swap_condition (code);
21812 break;
21814 default:
21815 gcc_unreachable ();
21818 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21819 if (mode == V2DImode)
21821 switch (code)
21823 case EQ:
21824 /* SSE4.1 supports EQ. */
21825 if (!TARGET_SSE4_1)
21826 return false;
21827 break;
21829 case GT:
21830 case GTU:
21831 /* SSE4.2 supports GT/GTU. */
21832 if (!TARGET_SSE4_2)
21833 return false;
21834 break;
21836 default:
21837 gcc_unreachable ();
21841 /* Unsigned parallel compare is not supported by the hardware.
21842 Play some tricks to turn this into a signed comparison
21843 against 0. */
21844 if (code == GTU)
21846 cop0 = force_reg (mode, cop0);
21848 switch (mode)
21850 case V16SImode:
21851 case V8DImode:
21852 case V8SImode:
21853 case V4DImode:
21854 case V4SImode:
21855 case V2DImode:
21857 rtx t1, t2, mask;
21858 rtx (*gen_sub3) (rtx, rtx, rtx);
21860 switch (mode)
21862 case V16SImode: gen_sub3 = gen_subv16si3; break;
21863 case V8DImode: gen_sub3 = gen_subv8di3; break;
21864 case V8SImode: gen_sub3 = gen_subv8si3; break;
21865 case V4DImode: gen_sub3 = gen_subv4di3; break;
21866 case V4SImode: gen_sub3 = gen_subv4si3; break;
21867 case V2DImode: gen_sub3 = gen_subv2di3; break;
21868 default:
21869 gcc_unreachable ();
21871 /* Subtract (-(INT MAX) - 1) from both operands to make
21872 them signed. */
21873 mask = ix86_build_signbit_mask (mode, true, false);
21874 t1 = gen_reg_rtx (mode);
21875 emit_insn (gen_sub3 (t1, cop0, mask));
21877 t2 = gen_reg_rtx (mode);
21878 emit_insn (gen_sub3 (t2, cop1, mask));
21880 cop0 = t1;
21881 cop1 = t2;
21882 code = GT;
21884 break;
21886 case V64QImode:
21887 case V32HImode:
21888 case V32QImode:
21889 case V16HImode:
21890 case V16QImode:
21891 case V8HImode:
21892 /* Perform a parallel unsigned saturating subtraction. */
21893 x = gen_reg_rtx (mode);
21894 emit_insn (gen_rtx_SET (VOIDmode, x,
21895 gen_rtx_US_MINUS (mode, cop0, cop1)));
21897 cop0 = x;
21898 cop1 = CONST0_RTX (mode);
21899 code = EQ;
21900 negate = !negate;
21901 break;
21903 default:
21904 gcc_unreachable ();
21909 /* Allow the comparison to be done in one mode, but the movcc to
21910 happen in another mode. */
21911 if (data_mode == mode)
21913 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21914 operands[1+negate], operands[2-negate]);
21916 else
21918 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21919 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21920 operands[1+negate], operands[2-negate]);
21921 if (GET_MODE (x) == mode)
21922 x = gen_lowpart (data_mode, x);
21925 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21926 operands[2-negate]);
21927 return true;
21930 /* AVX512F does support 64-byte integer vector operations,
21931 thus the longest vector we are faced with is V64QImode. */
21932 #define MAX_VECT_LEN 64
21934 struct expand_vec_perm_d
21936 rtx target, op0, op1;
21937 unsigned char perm[MAX_VECT_LEN];
21938 machine_mode vmode;
21939 unsigned char nelt;
21940 bool one_operand_p;
21941 bool testing_p;
21944 static bool
21945 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21946 struct expand_vec_perm_d *d)
21948 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21949 expander, so args are either in d, or in op0, op1 etc. */
21950 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21951 machine_mode maskmode = mode;
21952 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21954 switch (mode)
21956 case V8HImode:
21957 if (TARGET_AVX512VL && TARGET_AVX512BW)
21958 gen = gen_avx512vl_vpermi2varv8hi3;
21959 break;
21960 case V16HImode:
21961 if (TARGET_AVX512VL && TARGET_AVX512BW)
21962 gen = gen_avx512vl_vpermi2varv16hi3;
21963 break;
21964 case V64QImode:
21965 if (TARGET_AVX512VBMI)
21966 gen = gen_avx512bw_vpermi2varv64qi3;
21967 break;
21968 case V32HImode:
21969 if (TARGET_AVX512BW)
21970 gen = gen_avx512bw_vpermi2varv32hi3;
21971 break;
21972 case V4SImode:
21973 if (TARGET_AVX512VL)
21974 gen = gen_avx512vl_vpermi2varv4si3;
21975 break;
21976 case V8SImode:
21977 if (TARGET_AVX512VL)
21978 gen = gen_avx512vl_vpermi2varv8si3;
21979 break;
21980 case V16SImode:
21981 if (TARGET_AVX512F)
21982 gen = gen_avx512f_vpermi2varv16si3;
21983 break;
21984 case V4SFmode:
21985 if (TARGET_AVX512VL)
21987 gen = gen_avx512vl_vpermi2varv4sf3;
21988 maskmode = V4SImode;
21990 break;
21991 case V8SFmode:
21992 if (TARGET_AVX512VL)
21994 gen = gen_avx512vl_vpermi2varv8sf3;
21995 maskmode = V8SImode;
21997 break;
21998 case V16SFmode:
21999 if (TARGET_AVX512F)
22001 gen = gen_avx512f_vpermi2varv16sf3;
22002 maskmode = V16SImode;
22004 break;
22005 case V2DImode:
22006 if (TARGET_AVX512VL)
22007 gen = gen_avx512vl_vpermi2varv2di3;
22008 break;
22009 case V4DImode:
22010 if (TARGET_AVX512VL)
22011 gen = gen_avx512vl_vpermi2varv4di3;
22012 break;
22013 case V8DImode:
22014 if (TARGET_AVX512F)
22015 gen = gen_avx512f_vpermi2varv8di3;
22016 break;
22017 case V2DFmode:
22018 if (TARGET_AVX512VL)
22020 gen = gen_avx512vl_vpermi2varv2df3;
22021 maskmode = V2DImode;
22023 break;
22024 case V4DFmode:
22025 if (TARGET_AVX512VL)
22027 gen = gen_avx512vl_vpermi2varv4df3;
22028 maskmode = V4DImode;
22030 break;
22031 case V8DFmode:
22032 if (TARGET_AVX512F)
22034 gen = gen_avx512f_vpermi2varv8df3;
22035 maskmode = V8DImode;
22037 break;
22038 default:
22039 break;
22042 if (gen == NULL)
22043 return false;
22045 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22046 expander, so args are either in d, or in op0, op1 etc. */
22047 if (d)
22049 rtx vec[64];
22050 target = d->target;
22051 op0 = d->op0;
22052 op1 = d->op1;
22053 for (int i = 0; i < d->nelt; ++i)
22054 vec[i] = GEN_INT (d->perm[i]);
22055 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22058 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22059 return true;
22062 /* Expand a variable vector permutation. */
22064 void
22065 ix86_expand_vec_perm (rtx operands[])
22067 rtx target = operands[0];
22068 rtx op0 = operands[1];
22069 rtx op1 = operands[2];
22070 rtx mask = operands[3];
22071 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22072 machine_mode mode = GET_MODE (op0);
22073 machine_mode maskmode = GET_MODE (mask);
22074 int w, e, i;
22075 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22077 /* Number of elements in the vector. */
22078 w = GET_MODE_NUNITS (mode);
22079 e = GET_MODE_UNIT_SIZE (mode);
22080 gcc_assert (w <= 64);
22082 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22083 return;
22085 if (TARGET_AVX2)
22087 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22089 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22090 an constant shuffle operand. With a tiny bit of effort we can
22091 use VPERMD instead. A re-interpretation stall for V4DFmode is
22092 unfortunate but there's no avoiding it.
22093 Similarly for V16HImode we don't have instructions for variable
22094 shuffling, while for V32QImode we can use after preparing suitable
22095 masks vpshufb; vpshufb; vpermq; vpor. */
22097 if (mode == V16HImode)
22099 maskmode = mode = V32QImode;
22100 w = 32;
22101 e = 1;
22103 else
22105 maskmode = mode = V8SImode;
22106 w = 8;
22107 e = 4;
22109 t1 = gen_reg_rtx (maskmode);
22111 /* Replicate the low bits of the V4DImode mask into V8SImode:
22112 mask = { A B C D }
22113 t1 = { A A B B C C D D }. */
22114 for (i = 0; i < w / 2; ++i)
22115 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22116 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22117 vt = force_reg (maskmode, vt);
22118 mask = gen_lowpart (maskmode, mask);
22119 if (maskmode == V8SImode)
22120 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22121 else
22122 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22124 /* Multiply the shuffle indicies by two. */
22125 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22126 OPTAB_DIRECT);
22128 /* Add one to the odd shuffle indicies:
22129 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22130 for (i = 0; i < w / 2; ++i)
22132 vec[i * 2] = const0_rtx;
22133 vec[i * 2 + 1] = const1_rtx;
22135 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22136 vt = validize_mem (force_const_mem (maskmode, vt));
22137 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22138 OPTAB_DIRECT);
22140 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22141 operands[3] = mask = t1;
22142 target = gen_reg_rtx (mode);
22143 op0 = gen_lowpart (mode, op0);
22144 op1 = gen_lowpart (mode, op1);
22147 switch (mode)
22149 case V8SImode:
22150 /* The VPERMD and VPERMPS instructions already properly ignore
22151 the high bits of the shuffle elements. No need for us to
22152 perform an AND ourselves. */
22153 if (one_operand_shuffle)
22155 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22156 if (target != operands[0])
22157 emit_move_insn (operands[0],
22158 gen_lowpart (GET_MODE (operands[0]), target));
22160 else
22162 t1 = gen_reg_rtx (V8SImode);
22163 t2 = gen_reg_rtx (V8SImode);
22164 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22165 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22166 goto merge_two;
22168 return;
22170 case V8SFmode:
22171 mask = gen_lowpart (V8SImode, mask);
22172 if (one_operand_shuffle)
22173 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22174 else
22176 t1 = gen_reg_rtx (V8SFmode);
22177 t2 = gen_reg_rtx (V8SFmode);
22178 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22179 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22180 goto merge_two;
22182 return;
22184 case V4SImode:
22185 /* By combining the two 128-bit input vectors into one 256-bit
22186 input vector, we can use VPERMD and VPERMPS for the full
22187 two-operand shuffle. */
22188 t1 = gen_reg_rtx (V8SImode);
22189 t2 = gen_reg_rtx (V8SImode);
22190 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22191 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22192 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22193 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22194 return;
22196 case V4SFmode:
22197 t1 = gen_reg_rtx (V8SFmode);
22198 t2 = gen_reg_rtx (V8SImode);
22199 mask = gen_lowpart (V4SImode, mask);
22200 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22201 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22202 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22203 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22204 return;
22206 case V32QImode:
22207 t1 = gen_reg_rtx (V32QImode);
22208 t2 = gen_reg_rtx (V32QImode);
22209 t3 = gen_reg_rtx (V32QImode);
22210 vt2 = GEN_INT (-128);
22211 for (i = 0; i < 32; i++)
22212 vec[i] = vt2;
22213 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22214 vt = force_reg (V32QImode, vt);
22215 for (i = 0; i < 32; i++)
22216 vec[i] = i < 16 ? vt2 : const0_rtx;
22217 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22218 vt2 = force_reg (V32QImode, vt2);
22219 /* From mask create two adjusted masks, which contain the same
22220 bits as mask in the low 7 bits of each vector element.
22221 The first mask will have the most significant bit clear
22222 if it requests element from the same 128-bit lane
22223 and MSB set if it requests element from the other 128-bit lane.
22224 The second mask will have the opposite values of the MSB,
22225 and additionally will have its 128-bit lanes swapped.
22226 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22227 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22228 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22229 stands for other 12 bytes. */
22230 /* The bit whether element is from the same lane or the other
22231 lane is bit 4, so shift it up by 3 to the MSB position. */
22232 t5 = gen_reg_rtx (V4DImode);
22233 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22234 GEN_INT (3)));
22235 /* Clear MSB bits from the mask just in case it had them set. */
22236 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22237 /* After this t1 will have MSB set for elements from other lane. */
22238 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22239 /* Clear bits other than MSB. */
22240 emit_insn (gen_andv32qi3 (t1, t1, vt));
22241 /* Or in the lower bits from mask into t3. */
22242 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22243 /* And invert MSB bits in t1, so MSB is set for elements from the same
22244 lane. */
22245 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22246 /* Swap 128-bit lanes in t3. */
22247 t6 = gen_reg_rtx (V4DImode);
22248 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22249 const2_rtx, GEN_INT (3),
22250 const0_rtx, const1_rtx));
22251 /* And or in the lower bits from mask into t1. */
22252 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22253 if (one_operand_shuffle)
22255 /* Each of these shuffles will put 0s in places where
22256 element from the other 128-bit lane is needed, otherwise
22257 will shuffle in the requested value. */
22258 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22259 gen_lowpart (V32QImode, t6)));
22260 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22261 /* For t3 the 128-bit lanes are swapped again. */
22262 t7 = gen_reg_rtx (V4DImode);
22263 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22264 const2_rtx, GEN_INT (3),
22265 const0_rtx, const1_rtx));
22266 /* And oring both together leads to the result. */
22267 emit_insn (gen_iorv32qi3 (target, t1,
22268 gen_lowpart (V32QImode, t7)));
22269 if (target != operands[0])
22270 emit_move_insn (operands[0],
22271 gen_lowpart (GET_MODE (operands[0]), target));
22272 return;
22275 t4 = gen_reg_rtx (V32QImode);
22276 /* Similarly to the above one_operand_shuffle code,
22277 just for repeated twice for each operand. merge_two:
22278 code will merge the two results together. */
22279 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22280 gen_lowpart (V32QImode, t6)));
22281 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22282 gen_lowpart (V32QImode, t6)));
22283 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22284 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22285 t7 = gen_reg_rtx (V4DImode);
22286 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22287 const2_rtx, GEN_INT (3),
22288 const0_rtx, const1_rtx));
22289 t8 = gen_reg_rtx (V4DImode);
22290 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22291 const2_rtx, GEN_INT (3),
22292 const0_rtx, const1_rtx));
22293 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22294 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22295 t1 = t4;
22296 t2 = t3;
22297 goto merge_two;
22299 default:
22300 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22301 break;
22305 if (TARGET_XOP)
22307 /* The XOP VPPERM insn supports three inputs. By ignoring the
22308 one_operand_shuffle special case, we avoid creating another
22309 set of constant vectors in memory. */
22310 one_operand_shuffle = false;
22312 /* mask = mask & {2*w-1, ...} */
22313 vt = GEN_INT (2*w - 1);
22315 else
22317 /* mask = mask & {w-1, ...} */
22318 vt = GEN_INT (w - 1);
22321 for (i = 0; i < w; i++)
22322 vec[i] = vt;
22323 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22324 mask = expand_simple_binop (maskmode, AND, mask, vt,
22325 NULL_RTX, 0, OPTAB_DIRECT);
22327 /* For non-QImode operations, convert the word permutation control
22328 into a byte permutation control. */
22329 if (mode != V16QImode)
22331 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22332 GEN_INT (exact_log2 (e)),
22333 NULL_RTX, 0, OPTAB_DIRECT);
22335 /* Convert mask to vector of chars. */
22336 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22338 /* Replicate each of the input bytes into byte positions:
22339 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22340 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22341 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22342 for (i = 0; i < 16; ++i)
22343 vec[i] = GEN_INT (i/e * e);
22344 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22345 vt = validize_mem (force_const_mem (V16QImode, vt));
22346 if (TARGET_XOP)
22347 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22348 else
22349 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22351 /* Convert it into the byte positions by doing
22352 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22353 for (i = 0; i < 16; ++i)
22354 vec[i] = GEN_INT (i % e);
22355 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22356 vt = validize_mem (force_const_mem (V16QImode, vt));
22357 emit_insn (gen_addv16qi3 (mask, mask, vt));
22360 /* The actual shuffle operations all operate on V16QImode. */
22361 op0 = gen_lowpart (V16QImode, op0);
22362 op1 = gen_lowpart (V16QImode, op1);
22364 if (TARGET_XOP)
22366 if (GET_MODE (target) != V16QImode)
22367 target = gen_reg_rtx (V16QImode);
22368 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22369 if (target != operands[0])
22370 emit_move_insn (operands[0],
22371 gen_lowpart (GET_MODE (operands[0]), target));
22373 else if (one_operand_shuffle)
22375 if (GET_MODE (target) != V16QImode)
22376 target = gen_reg_rtx (V16QImode);
22377 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22378 if (target != operands[0])
22379 emit_move_insn (operands[0],
22380 gen_lowpart (GET_MODE (operands[0]), target));
22382 else
22384 rtx xops[6];
22385 bool ok;
22387 /* Shuffle the two input vectors independently. */
22388 t1 = gen_reg_rtx (V16QImode);
22389 t2 = gen_reg_rtx (V16QImode);
22390 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22391 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22393 merge_two:
22394 /* Then merge them together. The key is whether any given control
22395 element contained a bit set that indicates the second word. */
22396 mask = operands[3];
22397 vt = GEN_INT (w);
22398 if (maskmode == V2DImode && !TARGET_SSE4_1)
22400 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22401 more shuffle to convert the V2DI input mask into a V4SI
22402 input mask. At which point the masking that expand_int_vcond
22403 will work as desired. */
22404 rtx t3 = gen_reg_rtx (V4SImode);
22405 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22406 const0_rtx, const0_rtx,
22407 const2_rtx, const2_rtx));
22408 mask = t3;
22409 maskmode = V4SImode;
22410 e = w = 4;
22413 for (i = 0; i < w; i++)
22414 vec[i] = vt;
22415 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22416 vt = force_reg (maskmode, vt);
22417 mask = expand_simple_binop (maskmode, AND, mask, vt,
22418 NULL_RTX, 0, OPTAB_DIRECT);
22420 if (GET_MODE (target) != mode)
22421 target = gen_reg_rtx (mode);
22422 xops[0] = target;
22423 xops[1] = gen_lowpart (mode, t2);
22424 xops[2] = gen_lowpart (mode, t1);
22425 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22426 xops[4] = mask;
22427 xops[5] = vt;
22428 ok = ix86_expand_int_vcond (xops);
22429 gcc_assert (ok);
22430 if (target != operands[0])
22431 emit_move_insn (operands[0],
22432 gen_lowpart (GET_MODE (operands[0]), target));
22436 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22437 true if we should do zero extension, else sign extension. HIGH_P is
22438 true if we want the N/2 high elements, else the low elements. */
22440 void
22441 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22443 machine_mode imode = GET_MODE (src);
22444 rtx tmp;
22446 if (TARGET_SSE4_1)
22448 rtx (*unpack)(rtx, rtx);
22449 rtx (*extract)(rtx, rtx) = NULL;
22450 machine_mode halfmode = BLKmode;
22452 switch (imode)
22454 case V64QImode:
22455 if (unsigned_p)
22456 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22457 else
22458 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22459 halfmode = V32QImode;
22460 extract
22461 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22462 break;
22463 case V32QImode:
22464 if (unsigned_p)
22465 unpack = gen_avx2_zero_extendv16qiv16hi2;
22466 else
22467 unpack = gen_avx2_sign_extendv16qiv16hi2;
22468 halfmode = V16QImode;
22469 extract
22470 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22471 break;
22472 case V32HImode:
22473 if (unsigned_p)
22474 unpack = gen_avx512f_zero_extendv16hiv16si2;
22475 else
22476 unpack = gen_avx512f_sign_extendv16hiv16si2;
22477 halfmode = V16HImode;
22478 extract
22479 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22480 break;
22481 case V16HImode:
22482 if (unsigned_p)
22483 unpack = gen_avx2_zero_extendv8hiv8si2;
22484 else
22485 unpack = gen_avx2_sign_extendv8hiv8si2;
22486 halfmode = V8HImode;
22487 extract
22488 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22489 break;
22490 case V16SImode:
22491 if (unsigned_p)
22492 unpack = gen_avx512f_zero_extendv8siv8di2;
22493 else
22494 unpack = gen_avx512f_sign_extendv8siv8di2;
22495 halfmode = V8SImode;
22496 extract
22497 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22498 break;
22499 case V8SImode:
22500 if (unsigned_p)
22501 unpack = gen_avx2_zero_extendv4siv4di2;
22502 else
22503 unpack = gen_avx2_sign_extendv4siv4di2;
22504 halfmode = V4SImode;
22505 extract
22506 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22507 break;
22508 case V16QImode:
22509 if (unsigned_p)
22510 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22511 else
22512 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22513 break;
22514 case V8HImode:
22515 if (unsigned_p)
22516 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22517 else
22518 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22519 break;
22520 case V4SImode:
22521 if (unsigned_p)
22522 unpack = gen_sse4_1_zero_extendv2siv2di2;
22523 else
22524 unpack = gen_sse4_1_sign_extendv2siv2di2;
22525 break;
22526 default:
22527 gcc_unreachable ();
22530 if (GET_MODE_SIZE (imode) >= 32)
22532 tmp = gen_reg_rtx (halfmode);
22533 emit_insn (extract (tmp, src));
22535 else if (high_p)
22537 /* Shift higher 8 bytes to lower 8 bytes. */
22538 tmp = gen_reg_rtx (V1TImode);
22539 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22540 GEN_INT (64)));
22541 tmp = gen_lowpart (imode, tmp);
22543 else
22544 tmp = src;
22546 emit_insn (unpack (dest, tmp));
22548 else
22550 rtx (*unpack)(rtx, rtx, rtx);
22552 switch (imode)
22554 case V16QImode:
22555 if (high_p)
22556 unpack = gen_vec_interleave_highv16qi;
22557 else
22558 unpack = gen_vec_interleave_lowv16qi;
22559 break;
22560 case V8HImode:
22561 if (high_p)
22562 unpack = gen_vec_interleave_highv8hi;
22563 else
22564 unpack = gen_vec_interleave_lowv8hi;
22565 break;
22566 case V4SImode:
22567 if (high_p)
22568 unpack = gen_vec_interleave_highv4si;
22569 else
22570 unpack = gen_vec_interleave_lowv4si;
22571 break;
22572 default:
22573 gcc_unreachable ();
22576 if (unsigned_p)
22577 tmp = force_reg (imode, CONST0_RTX (imode));
22578 else
22579 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22580 src, pc_rtx, pc_rtx);
22582 rtx tmp2 = gen_reg_rtx (imode);
22583 emit_insn (unpack (tmp2, src, tmp));
22584 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22588 /* Expand conditional increment or decrement using adb/sbb instructions.
22589 The default case using setcc followed by the conditional move can be
22590 done by generic code. */
22591 bool
22592 ix86_expand_int_addcc (rtx operands[])
22594 enum rtx_code code = GET_CODE (operands[1]);
22595 rtx flags;
22596 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22597 rtx compare_op;
22598 rtx val = const0_rtx;
22599 bool fpcmp = false;
22600 machine_mode mode;
22601 rtx op0 = XEXP (operands[1], 0);
22602 rtx op1 = XEXP (operands[1], 1);
22604 if (operands[3] != const1_rtx
22605 && operands[3] != constm1_rtx)
22606 return false;
22607 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22608 return false;
22609 code = GET_CODE (compare_op);
22611 flags = XEXP (compare_op, 0);
22613 if (GET_MODE (flags) == CCFPmode
22614 || GET_MODE (flags) == CCFPUmode)
22616 fpcmp = true;
22617 code = ix86_fp_compare_code_to_integer (code);
22620 if (code != LTU)
22622 val = constm1_rtx;
22623 if (fpcmp)
22624 PUT_CODE (compare_op,
22625 reverse_condition_maybe_unordered
22626 (GET_CODE (compare_op)));
22627 else
22628 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22631 mode = GET_MODE (operands[0]);
22633 /* Construct either adc or sbb insn. */
22634 if ((code == LTU) == (operands[3] == constm1_rtx))
22636 switch (mode)
22638 case QImode:
22639 insn = gen_subqi3_carry;
22640 break;
22641 case HImode:
22642 insn = gen_subhi3_carry;
22643 break;
22644 case SImode:
22645 insn = gen_subsi3_carry;
22646 break;
22647 case DImode:
22648 insn = gen_subdi3_carry;
22649 break;
22650 default:
22651 gcc_unreachable ();
22654 else
22656 switch (mode)
22658 case QImode:
22659 insn = gen_addqi3_carry;
22660 break;
22661 case HImode:
22662 insn = gen_addhi3_carry;
22663 break;
22664 case SImode:
22665 insn = gen_addsi3_carry;
22666 break;
22667 case DImode:
22668 insn = gen_adddi3_carry;
22669 break;
22670 default:
22671 gcc_unreachable ();
22674 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22676 return true;
22680 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22681 but works for floating pointer parameters and nonoffsetable memories.
22682 For pushes, it returns just stack offsets; the values will be saved
22683 in the right order. Maximally three parts are generated. */
22685 static int
22686 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22688 int size;
22690 if (!TARGET_64BIT)
22691 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22692 else
22693 size = (GET_MODE_SIZE (mode) + 4) / 8;
22695 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22696 gcc_assert (size >= 2 && size <= 4);
22698 /* Optimize constant pool reference to immediates. This is used by fp
22699 moves, that force all constants to memory to allow combining. */
22700 if (MEM_P (operand) && MEM_READONLY_P (operand))
22702 rtx tmp = maybe_get_pool_constant (operand);
22703 if (tmp)
22704 operand = tmp;
22707 if (MEM_P (operand) && !offsettable_memref_p (operand))
22709 /* The only non-offsetable memories we handle are pushes. */
22710 int ok = push_operand (operand, VOIDmode);
22712 gcc_assert (ok);
22714 operand = copy_rtx (operand);
22715 PUT_MODE (operand, word_mode);
22716 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22717 return size;
22720 if (GET_CODE (operand) == CONST_VECTOR)
22722 machine_mode imode = int_mode_for_mode (mode);
22723 /* Caution: if we looked through a constant pool memory above,
22724 the operand may actually have a different mode now. That's
22725 ok, since we want to pun this all the way back to an integer. */
22726 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22727 gcc_assert (operand != NULL);
22728 mode = imode;
22731 if (!TARGET_64BIT)
22733 if (mode == DImode)
22734 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22735 else
22737 int i;
22739 if (REG_P (operand))
22741 gcc_assert (reload_completed);
22742 for (i = 0; i < size; i++)
22743 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22745 else if (offsettable_memref_p (operand))
22747 operand = adjust_address (operand, SImode, 0);
22748 parts[0] = operand;
22749 for (i = 1; i < size; i++)
22750 parts[i] = adjust_address (operand, SImode, 4 * i);
22752 else if (GET_CODE (operand) == CONST_DOUBLE)
22754 REAL_VALUE_TYPE r;
22755 long l[4];
22757 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22758 switch (mode)
22760 case TFmode:
22761 real_to_target (l, &r, mode);
22762 parts[3] = gen_int_mode (l[3], SImode);
22763 parts[2] = gen_int_mode (l[2], SImode);
22764 break;
22765 case XFmode:
22766 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22767 long double may not be 80-bit. */
22768 real_to_target (l, &r, mode);
22769 parts[2] = gen_int_mode (l[2], SImode);
22770 break;
22771 case DFmode:
22772 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22773 break;
22774 default:
22775 gcc_unreachable ();
22777 parts[1] = gen_int_mode (l[1], SImode);
22778 parts[0] = gen_int_mode (l[0], SImode);
22780 else
22781 gcc_unreachable ();
22784 else
22786 if (mode == TImode)
22787 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22788 if (mode == XFmode || mode == TFmode)
22790 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22791 if (REG_P (operand))
22793 gcc_assert (reload_completed);
22794 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22795 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22797 else if (offsettable_memref_p (operand))
22799 operand = adjust_address (operand, DImode, 0);
22800 parts[0] = operand;
22801 parts[1] = adjust_address (operand, upper_mode, 8);
22803 else if (GET_CODE (operand) == CONST_DOUBLE)
22805 REAL_VALUE_TYPE r;
22806 long l[4];
22808 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22809 real_to_target (l, &r, mode);
22811 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22812 if (HOST_BITS_PER_WIDE_INT >= 64)
22813 parts[0]
22814 = gen_int_mode
22815 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22816 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22817 DImode);
22818 else
22819 parts[0] = immed_double_const (l[0], l[1], DImode);
22821 if (upper_mode == SImode)
22822 parts[1] = gen_int_mode (l[2], SImode);
22823 else if (HOST_BITS_PER_WIDE_INT >= 64)
22824 parts[1]
22825 = gen_int_mode
22826 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22827 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22828 DImode);
22829 else
22830 parts[1] = immed_double_const (l[2], l[3], DImode);
22832 else
22833 gcc_unreachable ();
22837 return size;
22840 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22841 Return false when normal moves are needed; true when all required
22842 insns have been emitted. Operands 2-4 contain the input values
22843 int the correct order; operands 5-7 contain the output values. */
22845 void
22846 ix86_split_long_move (rtx operands[])
22848 rtx part[2][4];
22849 int nparts, i, j;
22850 int push = 0;
22851 int collisions = 0;
22852 machine_mode mode = GET_MODE (operands[0]);
22853 bool collisionparts[4];
22855 /* The DFmode expanders may ask us to move double.
22856 For 64bit target this is single move. By hiding the fact
22857 here we simplify i386.md splitters. */
22858 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22860 /* Optimize constant pool reference to immediates. This is used by
22861 fp moves, that force all constants to memory to allow combining. */
22863 if (MEM_P (operands[1])
22864 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22865 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22866 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22867 if (push_operand (operands[0], VOIDmode))
22869 operands[0] = copy_rtx (operands[0]);
22870 PUT_MODE (operands[0], word_mode);
22872 else
22873 operands[0] = gen_lowpart (DImode, operands[0]);
22874 operands[1] = gen_lowpart (DImode, operands[1]);
22875 emit_move_insn (operands[0], operands[1]);
22876 return;
22879 /* The only non-offsettable memory we handle is push. */
22880 if (push_operand (operands[0], VOIDmode))
22881 push = 1;
22882 else
22883 gcc_assert (!MEM_P (operands[0])
22884 || offsettable_memref_p (operands[0]));
22886 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22887 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22889 /* When emitting push, take care for source operands on the stack. */
22890 if (push && MEM_P (operands[1])
22891 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22893 rtx src_base = XEXP (part[1][nparts - 1], 0);
22895 /* Compensate for the stack decrement by 4. */
22896 if (!TARGET_64BIT && nparts == 3
22897 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22898 src_base = plus_constant (Pmode, src_base, 4);
22900 /* src_base refers to the stack pointer and is
22901 automatically decreased by emitted push. */
22902 for (i = 0; i < nparts; i++)
22903 part[1][i] = change_address (part[1][i],
22904 GET_MODE (part[1][i]), src_base);
22907 /* We need to do copy in the right order in case an address register
22908 of the source overlaps the destination. */
22909 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22911 rtx tmp;
22913 for (i = 0; i < nparts; i++)
22915 collisionparts[i]
22916 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22917 if (collisionparts[i])
22918 collisions++;
22921 /* Collision in the middle part can be handled by reordering. */
22922 if (collisions == 1 && nparts == 3 && collisionparts [1])
22924 std::swap (part[0][1], part[0][2]);
22925 std::swap (part[1][1], part[1][2]);
22927 else if (collisions == 1
22928 && nparts == 4
22929 && (collisionparts [1] || collisionparts [2]))
22931 if (collisionparts [1])
22933 std::swap (part[0][1], part[0][2]);
22934 std::swap (part[1][1], part[1][2]);
22936 else
22938 std::swap (part[0][2], part[0][3]);
22939 std::swap (part[1][2], part[1][3]);
22943 /* If there are more collisions, we can't handle it by reordering.
22944 Do an lea to the last part and use only one colliding move. */
22945 else if (collisions > 1)
22947 rtx base;
22949 collisions = 1;
22951 base = part[0][nparts - 1];
22953 /* Handle the case when the last part isn't valid for lea.
22954 Happens in 64-bit mode storing the 12-byte XFmode. */
22955 if (GET_MODE (base) != Pmode)
22956 base = gen_rtx_REG (Pmode, REGNO (base));
22958 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22959 part[1][0] = replace_equiv_address (part[1][0], base);
22960 for (i = 1; i < nparts; i++)
22962 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22963 part[1][i] = replace_equiv_address (part[1][i], tmp);
22968 if (push)
22970 if (!TARGET_64BIT)
22972 if (nparts == 3)
22974 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22975 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22976 stack_pointer_rtx, GEN_INT (-4)));
22977 emit_move_insn (part[0][2], part[1][2]);
22979 else if (nparts == 4)
22981 emit_move_insn (part[0][3], part[1][3]);
22982 emit_move_insn (part[0][2], part[1][2]);
22985 else
22987 /* In 64bit mode we don't have 32bit push available. In case this is
22988 register, it is OK - we will just use larger counterpart. We also
22989 retype memory - these comes from attempt to avoid REX prefix on
22990 moving of second half of TFmode value. */
22991 if (GET_MODE (part[1][1]) == SImode)
22993 switch (GET_CODE (part[1][1]))
22995 case MEM:
22996 part[1][1] = adjust_address (part[1][1], DImode, 0);
22997 break;
22999 case REG:
23000 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23001 break;
23003 default:
23004 gcc_unreachable ();
23007 if (GET_MODE (part[1][0]) == SImode)
23008 part[1][0] = part[1][1];
23011 emit_move_insn (part[0][1], part[1][1]);
23012 emit_move_insn (part[0][0], part[1][0]);
23013 return;
23016 /* Choose correct order to not overwrite the source before it is copied. */
23017 if ((REG_P (part[0][0])
23018 && REG_P (part[1][1])
23019 && (REGNO (part[0][0]) == REGNO (part[1][1])
23020 || (nparts == 3
23021 && REGNO (part[0][0]) == REGNO (part[1][2]))
23022 || (nparts == 4
23023 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23024 || (collisions > 0
23025 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23027 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23029 operands[2 + i] = part[0][j];
23030 operands[6 + i] = part[1][j];
23033 else
23035 for (i = 0; i < nparts; i++)
23037 operands[2 + i] = part[0][i];
23038 operands[6 + i] = part[1][i];
23042 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23043 if (optimize_insn_for_size_p ())
23045 for (j = 0; j < nparts - 1; j++)
23046 if (CONST_INT_P (operands[6 + j])
23047 && operands[6 + j] != const0_rtx
23048 && REG_P (operands[2 + j]))
23049 for (i = j; i < nparts - 1; i++)
23050 if (CONST_INT_P (operands[7 + i])
23051 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23052 operands[7 + i] = operands[2 + j];
23055 for (i = 0; i < nparts; i++)
23056 emit_move_insn (operands[2 + i], operands[6 + i]);
23058 return;
23061 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23062 left shift by a constant, either using a single shift or
23063 a sequence of add instructions. */
23065 static void
23066 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23068 rtx (*insn)(rtx, rtx, rtx);
23070 if (count == 1
23071 || (count * ix86_cost->add <= ix86_cost->shift_const
23072 && !optimize_insn_for_size_p ()))
23074 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23075 while (count-- > 0)
23076 emit_insn (insn (operand, operand, operand));
23078 else
23080 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23081 emit_insn (insn (operand, operand, GEN_INT (count)));
23085 void
23086 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23088 rtx (*gen_ashl3)(rtx, rtx, rtx);
23089 rtx (*gen_shld)(rtx, rtx, rtx);
23090 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23092 rtx low[2], high[2];
23093 int count;
23095 if (CONST_INT_P (operands[2]))
23097 split_double_mode (mode, operands, 2, low, high);
23098 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23100 if (count >= half_width)
23102 emit_move_insn (high[0], low[1]);
23103 emit_move_insn (low[0], const0_rtx);
23105 if (count > half_width)
23106 ix86_expand_ashl_const (high[0], count - half_width, mode);
23108 else
23110 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23112 if (!rtx_equal_p (operands[0], operands[1]))
23113 emit_move_insn (operands[0], operands[1]);
23115 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23116 ix86_expand_ashl_const (low[0], count, mode);
23118 return;
23121 split_double_mode (mode, operands, 1, low, high);
23123 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23125 if (operands[1] == const1_rtx)
23127 /* Assuming we've chosen a QImode capable registers, then 1 << N
23128 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23129 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23131 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23133 ix86_expand_clear (low[0]);
23134 ix86_expand_clear (high[0]);
23135 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23137 d = gen_lowpart (QImode, low[0]);
23138 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23139 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23140 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23142 d = gen_lowpart (QImode, high[0]);
23143 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23144 s = gen_rtx_NE (QImode, flags, const0_rtx);
23145 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23148 /* Otherwise, we can get the same results by manually performing
23149 a bit extract operation on bit 5/6, and then performing the two
23150 shifts. The two methods of getting 0/1 into low/high are exactly
23151 the same size. Avoiding the shift in the bit extract case helps
23152 pentium4 a bit; no one else seems to care much either way. */
23153 else
23155 machine_mode half_mode;
23156 rtx (*gen_lshr3)(rtx, rtx, rtx);
23157 rtx (*gen_and3)(rtx, rtx, rtx);
23158 rtx (*gen_xor3)(rtx, rtx, rtx);
23159 HOST_WIDE_INT bits;
23160 rtx x;
23162 if (mode == DImode)
23164 half_mode = SImode;
23165 gen_lshr3 = gen_lshrsi3;
23166 gen_and3 = gen_andsi3;
23167 gen_xor3 = gen_xorsi3;
23168 bits = 5;
23170 else
23172 half_mode = DImode;
23173 gen_lshr3 = gen_lshrdi3;
23174 gen_and3 = gen_anddi3;
23175 gen_xor3 = gen_xordi3;
23176 bits = 6;
23179 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23180 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23181 else
23182 x = gen_lowpart (half_mode, operands[2]);
23183 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23185 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23186 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23187 emit_move_insn (low[0], high[0]);
23188 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23191 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23192 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23193 return;
23196 if (operands[1] == constm1_rtx)
23198 /* For -1 << N, we can avoid the shld instruction, because we
23199 know that we're shifting 0...31/63 ones into a -1. */
23200 emit_move_insn (low[0], constm1_rtx);
23201 if (optimize_insn_for_size_p ())
23202 emit_move_insn (high[0], low[0]);
23203 else
23204 emit_move_insn (high[0], constm1_rtx);
23206 else
23208 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23210 if (!rtx_equal_p (operands[0], operands[1]))
23211 emit_move_insn (operands[0], operands[1]);
23213 split_double_mode (mode, operands, 1, low, high);
23214 emit_insn (gen_shld (high[0], low[0], operands[2]));
23217 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23219 if (TARGET_CMOVE && scratch)
23221 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23222 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23224 ix86_expand_clear (scratch);
23225 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23227 else
23229 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23230 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23232 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23236 void
23237 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23239 rtx (*gen_ashr3)(rtx, rtx, rtx)
23240 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23241 rtx (*gen_shrd)(rtx, rtx, rtx);
23242 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23244 rtx low[2], high[2];
23245 int count;
23247 if (CONST_INT_P (operands[2]))
23249 split_double_mode (mode, operands, 2, low, high);
23250 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23252 if (count == GET_MODE_BITSIZE (mode) - 1)
23254 emit_move_insn (high[0], high[1]);
23255 emit_insn (gen_ashr3 (high[0], high[0],
23256 GEN_INT (half_width - 1)));
23257 emit_move_insn (low[0], high[0]);
23260 else if (count >= half_width)
23262 emit_move_insn (low[0], high[1]);
23263 emit_move_insn (high[0], low[0]);
23264 emit_insn (gen_ashr3 (high[0], high[0],
23265 GEN_INT (half_width - 1)));
23267 if (count > half_width)
23268 emit_insn (gen_ashr3 (low[0], low[0],
23269 GEN_INT (count - half_width)));
23271 else
23273 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23275 if (!rtx_equal_p (operands[0], operands[1]))
23276 emit_move_insn (operands[0], operands[1]);
23278 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23279 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23282 else
23284 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23286 if (!rtx_equal_p (operands[0], operands[1]))
23287 emit_move_insn (operands[0], operands[1]);
23289 split_double_mode (mode, operands, 1, low, high);
23291 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23292 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23294 if (TARGET_CMOVE && scratch)
23296 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23297 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23299 emit_move_insn (scratch, high[0]);
23300 emit_insn (gen_ashr3 (scratch, scratch,
23301 GEN_INT (half_width - 1)));
23302 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23303 scratch));
23305 else
23307 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23308 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23310 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23315 void
23316 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23318 rtx (*gen_lshr3)(rtx, rtx, rtx)
23319 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23320 rtx (*gen_shrd)(rtx, rtx, rtx);
23321 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23323 rtx low[2], high[2];
23324 int count;
23326 if (CONST_INT_P (operands[2]))
23328 split_double_mode (mode, operands, 2, low, high);
23329 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23331 if (count >= half_width)
23333 emit_move_insn (low[0], high[1]);
23334 ix86_expand_clear (high[0]);
23336 if (count > half_width)
23337 emit_insn (gen_lshr3 (low[0], low[0],
23338 GEN_INT (count - half_width)));
23340 else
23342 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23344 if (!rtx_equal_p (operands[0], operands[1]))
23345 emit_move_insn (operands[0], operands[1]);
23347 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23348 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23351 else
23353 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23355 if (!rtx_equal_p (operands[0], operands[1]))
23356 emit_move_insn (operands[0], operands[1]);
23358 split_double_mode (mode, operands, 1, low, high);
23360 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23361 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23363 if (TARGET_CMOVE && scratch)
23365 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23366 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23368 ix86_expand_clear (scratch);
23369 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23370 scratch));
23372 else
23374 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23375 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23377 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23382 /* Predict just emitted jump instruction to be taken with probability PROB. */
23383 static void
23384 predict_jump (int prob)
23386 rtx insn = get_last_insn ();
23387 gcc_assert (JUMP_P (insn));
23388 add_int_reg_note (insn, REG_BR_PROB, prob);
23391 /* Helper function for the string operations below. Dest VARIABLE whether
23392 it is aligned to VALUE bytes. If true, jump to the label. */
23393 static rtx_code_label *
23394 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23396 rtx_code_label *label = gen_label_rtx ();
23397 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23398 if (GET_MODE (variable) == DImode)
23399 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23400 else
23401 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23402 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23403 1, label);
23404 if (epilogue)
23405 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23406 else
23407 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23408 return label;
23411 /* Adjust COUNTER by the VALUE. */
23412 static void
23413 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23415 rtx (*gen_add)(rtx, rtx, rtx)
23416 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23418 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23421 /* Zero extend possibly SImode EXP to Pmode register. */
23423 ix86_zero_extend_to_Pmode (rtx exp)
23425 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23428 /* Divide COUNTREG by SCALE. */
23429 static rtx
23430 scale_counter (rtx countreg, int scale)
23432 rtx sc;
23434 if (scale == 1)
23435 return countreg;
23436 if (CONST_INT_P (countreg))
23437 return GEN_INT (INTVAL (countreg) / scale);
23438 gcc_assert (REG_P (countreg));
23440 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23441 GEN_INT (exact_log2 (scale)),
23442 NULL, 1, OPTAB_DIRECT);
23443 return sc;
23446 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23447 DImode for constant loop counts. */
23449 static machine_mode
23450 counter_mode (rtx count_exp)
23452 if (GET_MODE (count_exp) != VOIDmode)
23453 return GET_MODE (count_exp);
23454 if (!CONST_INT_P (count_exp))
23455 return Pmode;
23456 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23457 return DImode;
23458 return SImode;
23461 /* Copy the address to a Pmode register. This is used for x32 to
23462 truncate DImode TLS address to a SImode register. */
23464 static rtx
23465 ix86_copy_addr_to_reg (rtx addr)
23467 rtx reg;
23468 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23470 reg = copy_addr_to_reg (addr);
23471 REG_POINTER (reg) = 1;
23472 return reg;
23474 else
23476 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23477 reg = copy_to_mode_reg (DImode, addr);
23478 REG_POINTER (reg) = 1;
23479 return gen_rtx_SUBREG (SImode, reg, 0);
23483 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23484 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23485 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23486 memory by VALUE (supposed to be in MODE).
23488 The size is rounded down to whole number of chunk size moved at once.
23489 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23492 static void
23493 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23494 rtx destptr, rtx srcptr, rtx value,
23495 rtx count, machine_mode mode, int unroll,
23496 int expected_size, bool issetmem)
23498 rtx_code_label *out_label, *top_label;
23499 rtx iter, tmp;
23500 machine_mode iter_mode = counter_mode (count);
23501 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23502 rtx piece_size = GEN_INT (piece_size_n);
23503 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23504 rtx size;
23505 int i;
23507 top_label = gen_label_rtx ();
23508 out_label = gen_label_rtx ();
23509 iter = gen_reg_rtx (iter_mode);
23511 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23512 NULL, 1, OPTAB_DIRECT);
23513 /* Those two should combine. */
23514 if (piece_size == const1_rtx)
23516 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23517 true, out_label);
23518 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23520 emit_move_insn (iter, const0_rtx);
23522 emit_label (top_label);
23524 tmp = convert_modes (Pmode, iter_mode, iter, true);
23526 /* This assert could be relaxed - in this case we'll need to compute
23527 smallest power of two, containing in PIECE_SIZE_N and pass it to
23528 offset_address. */
23529 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23530 destmem = offset_address (destmem, tmp, piece_size_n);
23531 destmem = adjust_address (destmem, mode, 0);
23533 if (!issetmem)
23535 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23536 srcmem = adjust_address (srcmem, mode, 0);
23538 /* When unrolling for chips that reorder memory reads and writes,
23539 we can save registers by using single temporary.
23540 Also using 4 temporaries is overkill in 32bit mode. */
23541 if (!TARGET_64BIT && 0)
23543 for (i = 0; i < unroll; i++)
23545 if (i)
23547 destmem =
23548 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23549 srcmem =
23550 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23552 emit_move_insn (destmem, srcmem);
23555 else
23557 rtx tmpreg[4];
23558 gcc_assert (unroll <= 4);
23559 for (i = 0; i < unroll; i++)
23561 tmpreg[i] = gen_reg_rtx (mode);
23562 if (i)
23564 srcmem =
23565 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23567 emit_move_insn (tmpreg[i], srcmem);
23569 for (i = 0; i < unroll; i++)
23571 if (i)
23573 destmem =
23574 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23576 emit_move_insn (destmem, tmpreg[i]);
23580 else
23581 for (i = 0; i < unroll; i++)
23583 if (i)
23584 destmem =
23585 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23586 emit_move_insn (destmem, value);
23589 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23590 true, OPTAB_LIB_WIDEN);
23591 if (tmp != iter)
23592 emit_move_insn (iter, tmp);
23594 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23595 true, top_label);
23596 if (expected_size != -1)
23598 expected_size /= GET_MODE_SIZE (mode) * unroll;
23599 if (expected_size == 0)
23600 predict_jump (0);
23601 else if (expected_size > REG_BR_PROB_BASE)
23602 predict_jump (REG_BR_PROB_BASE - 1);
23603 else
23604 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23606 else
23607 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23608 iter = ix86_zero_extend_to_Pmode (iter);
23609 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23610 true, OPTAB_LIB_WIDEN);
23611 if (tmp != destptr)
23612 emit_move_insn (destptr, tmp);
23613 if (!issetmem)
23615 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23616 true, OPTAB_LIB_WIDEN);
23617 if (tmp != srcptr)
23618 emit_move_insn (srcptr, tmp);
23620 emit_label (out_label);
23623 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23624 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23625 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23626 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23627 ORIG_VALUE is the original value passed to memset to fill the memory with.
23628 Other arguments have same meaning as for previous function. */
23630 static void
23631 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23632 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23633 rtx count,
23634 machine_mode mode, bool issetmem)
23636 rtx destexp;
23637 rtx srcexp;
23638 rtx countreg;
23639 HOST_WIDE_INT rounded_count;
23641 /* If possible, it is shorter to use rep movs.
23642 TODO: Maybe it is better to move this logic to decide_alg. */
23643 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23644 && (!issetmem || orig_value == const0_rtx))
23645 mode = SImode;
23647 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23648 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23650 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23651 GET_MODE_SIZE (mode)));
23652 if (mode != QImode)
23654 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23655 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23656 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23658 else
23659 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23660 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23662 rounded_count = (INTVAL (count)
23663 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23664 destmem = shallow_copy_rtx (destmem);
23665 set_mem_size (destmem, rounded_count);
23667 else if (MEM_SIZE_KNOWN_P (destmem))
23668 clear_mem_size (destmem);
23670 if (issetmem)
23672 value = force_reg (mode, gen_lowpart (mode, value));
23673 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23675 else
23677 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23678 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23679 if (mode != QImode)
23681 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23682 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23683 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23685 else
23686 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23687 if (CONST_INT_P (count))
23689 rounded_count = (INTVAL (count)
23690 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23691 srcmem = shallow_copy_rtx (srcmem);
23692 set_mem_size (srcmem, rounded_count);
23694 else
23696 if (MEM_SIZE_KNOWN_P (srcmem))
23697 clear_mem_size (srcmem);
23699 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23700 destexp, srcexp));
23704 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23705 DESTMEM.
23706 SRC is passed by pointer to be updated on return.
23707 Return value is updated DST. */
23708 static rtx
23709 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23710 HOST_WIDE_INT size_to_move)
23712 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23713 enum insn_code code;
23714 machine_mode move_mode;
23715 int piece_size, i;
23717 /* Find the widest mode in which we could perform moves.
23718 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23719 it until move of such size is supported. */
23720 piece_size = 1 << floor_log2 (size_to_move);
23721 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23722 code = optab_handler (mov_optab, move_mode);
23723 while (code == CODE_FOR_nothing && piece_size > 1)
23725 piece_size >>= 1;
23726 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23727 code = optab_handler (mov_optab, move_mode);
23730 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23731 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23732 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23734 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23735 move_mode = mode_for_vector (word_mode, nunits);
23736 code = optab_handler (mov_optab, move_mode);
23737 if (code == CODE_FOR_nothing)
23739 move_mode = word_mode;
23740 piece_size = GET_MODE_SIZE (move_mode);
23741 code = optab_handler (mov_optab, move_mode);
23744 gcc_assert (code != CODE_FOR_nothing);
23746 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23747 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23749 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23750 gcc_assert (size_to_move % piece_size == 0);
23751 adjust = GEN_INT (piece_size);
23752 for (i = 0; i < size_to_move; i += piece_size)
23754 /* We move from memory to memory, so we'll need to do it via
23755 a temporary register. */
23756 tempreg = gen_reg_rtx (move_mode);
23757 emit_insn (GEN_FCN (code) (tempreg, src));
23758 emit_insn (GEN_FCN (code) (dst, tempreg));
23760 emit_move_insn (destptr,
23761 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23762 emit_move_insn (srcptr,
23763 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23765 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23766 piece_size);
23767 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23768 piece_size);
23771 /* Update DST and SRC rtx. */
23772 *srcmem = src;
23773 return dst;
23776 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23777 static void
23778 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23779 rtx destptr, rtx srcptr, rtx count, int max_size)
23781 rtx src, dest;
23782 if (CONST_INT_P (count))
23784 HOST_WIDE_INT countval = INTVAL (count);
23785 HOST_WIDE_INT epilogue_size = countval % max_size;
23786 int i;
23788 /* For now MAX_SIZE should be a power of 2. This assert could be
23789 relaxed, but it'll require a bit more complicated epilogue
23790 expanding. */
23791 gcc_assert ((max_size & (max_size - 1)) == 0);
23792 for (i = max_size; i >= 1; i >>= 1)
23794 if (epilogue_size & i)
23795 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23797 return;
23799 if (max_size > 8)
23801 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23802 count, 1, OPTAB_DIRECT);
23803 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23804 count, QImode, 1, 4, false);
23805 return;
23808 /* When there are stringops, we can cheaply increase dest and src pointers.
23809 Otherwise we save code size by maintaining offset (zero is readily
23810 available from preceding rep operation) and using x86 addressing modes.
23812 if (TARGET_SINGLE_STRINGOP)
23814 if (max_size > 4)
23816 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23817 src = change_address (srcmem, SImode, srcptr);
23818 dest = change_address (destmem, SImode, destptr);
23819 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23820 emit_label (label);
23821 LABEL_NUSES (label) = 1;
23823 if (max_size > 2)
23825 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23826 src = change_address (srcmem, HImode, srcptr);
23827 dest = change_address (destmem, HImode, destptr);
23828 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23829 emit_label (label);
23830 LABEL_NUSES (label) = 1;
23832 if (max_size > 1)
23834 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23835 src = change_address (srcmem, QImode, srcptr);
23836 dest = change_address (destmem, QImode, destptr);
23837 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23838 emit_label (label);
23839 LABEL_NUSES (label) = 1;
23842 else
23844 rtx offset = force_reg (Pmode, const0_rtx);
23845 rtx tmp;
23847 if (max_size > 4)
23849 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23850 src = change_address (srcmem, SImode, srcptr);
23851 dest = change_address (destmem, SImode, destptr);
23852 emit_move_insn (dest, src);
23853 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23854 true, OPTAB_LIB_WIDEN);
23855 if (tmp != offset)
23856 emit_move_insn (offset, tmp);
23857 emit_label (label);
23858 LABEL_NUSES (label) = 1;
23860 if (max_size > 2)
23862 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23863 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23864 src = change_address (srcmem, HImode, tmp);
23865 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23866 dest = change_address (destmem, HImode, tmp);
23867 emit_move_insn (dest, src);
23868 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23869 true, OPTAB_LIB_WIDEN);
23870 if (tmp != offset)
23871 emit_move_insn (offset, tmp);
23872 emit_label (label);
23873 LABEL_NUSES (label) = 1;
23875 if (max_size > 1)
23877 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23878 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23879 src = change_address (srcmem, QImode, tmp);
23880 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23881 dest = change_address (destmem, QImode, tmp);
23882 emit_move_insn (dest, src);
23883 emit_label (label);
23884 LABEL_NUSES (label) = 1;
23889 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23890 with value PROMOTED_VAL.
23891 SRC is passed by pointer to be updated on return.
23892 Return value is updated DST. */
23893 static rtx
23894 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23895 HOST_WIDE_INT size_to_move)
23897 rtx dst = destmem, adjust;
23898 enum insn_code code;
23899 machine_mode move_mode;
23900 int piece_size, i;
23902 /* Find the widest mode in which we could perform moves.
23903 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23904 it until move of such size is supported. */
23905 move_mode = GET_MODE (promoted_val);
23906 if (move_mode == VOIDmode)
23907 move_mode = QImode;
23908 if (size_to_move < GET_MODE_SIZE (move_mode))
23910 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23911 promoted_val = gen_lowpart (move_mode, promoted_val);
23913 piece_size = GET_MODE_SIZE (move_mode);
23914 code = optab_handler (mov_optab, move_mode);
23915 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23917 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23919 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23920 gcc_assert (size_to_move % piece_size == 0);
23921 adjust = GEN_INT (piece_size);
23922 for (i = 0; i < size_to_move; i += piece_size)
23924 if (piece_size <= GET_MODE_SIZE (word_mode))
23926 emit_insn (gen_strset (destptr, dst, promoted_val));
23927 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23928 piece_size);
23929 continue;
23932 emit_insn (GEN_FCN (code) (dst, promoted_val));
23934 emit_move_insn (destptr,
23935 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23937 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23938 piece_size);
23941 /* Update DST rtx. */
23942 return dst;
23944 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23945 static void
23946 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23947 rtx count, int max_size)
23949 count =
23950 expand_simple_binop (counter_mode (count), AND, count,
23951 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23952 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23953 gen_lowpart (QImode, value), count, QImode,
23954 1, max_size / 2, true);
23957 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23958 static void
23959 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23960 rtx count, int max_size)
23962 rtx dest;
23964 if (CONST_INT_P (count))
23966 HOST_WIDE_INT countval = INTVAL (count);
23967 HOST_WIDE_INT epilogue_size = countval % max_size;
23968 int i;
23970 /* For now MAX_SIZE should be a power of 2. This assert could be
23971 relaxed, but it'll require a bit more complicated epilogue
23972 expanding. */
23973 gcc_assert ((max_size & (max_size - 1)) == 0);
23974 for (i = max_size; i >= 1; i >>= 1)
23976 if (epilogue_size & i)
23978 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23979 destmem = emit_memset (destmem, destptr, vec_value, i);
23980 else
23981 destmem = emit_memset (destmem, destptr, value, i);
23984 return;
23986 if (max_size > 32)
23988 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23989 return;
23991 if (max_size > 16)
23993 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23994 if (TARGET_64BIT)
23996 dest = change_address (destmem, DImode, destptr);
23997 emit_insn (gen_strset (destptr, dest, value));
23998 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23999 emit_insn (gen_strset (destptr, dest, value));
24001 else
24003 dest = change_address (destmem, SImode, destptr);
24004 emit_insn (gen_strset (destptr, dest, value));
24005 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24006 emit_insn (gen_strset (destptr, dest, value));
24007 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24008 emit_insn (gen_strset (destptr, dest, value));
24009 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24010 emit_insn (gen_strset (destptr, dest, value));
24012 emit_label (label);
24013 LABEL_NUSES (label) = 1;
24015 if (max_size > 8)
24017 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24018 if (TARGET_64BIT)
24020 dest = change_address (destmem, DImode, destptr);
24021 emit_insn (gen_strset (destptr, dest, value));
24023 else
24025 dest = change_address (destmem, SImode, destptr);
24026 emit_insn (gen_strset (destptr, dest, value));
24027 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24028 emit_insn (gen_strset (destptr, dest, value));
24030 emit_label (label);
24031 LABEL_NUSES (label) = 1;
24033 if (max_size > 4)
24035 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24036 dest = change_address (destmem, SImode, destptr);
24037 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24038 emit_label (label);
24039 LABEL_NUSES (label) = 1;
24041 if (max_size > 2)
24043 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24044 dest = change_address (destmem, HImode, destptr);
24045 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24046 emit_label (label);
24047 LABEL_NUSES (label) = 1;
24049 if (max_size > 1)
24051 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24052 dest = change_address (destmem, QImode, destptr);
24053 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24054 emit_label (label);
24055 LABEL_NUSES (label) = 1;
24059 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24060 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24061 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24062 ignored.
24063 Return value is updated DESTMEM. */
24064 static rtx
24065 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24066 rtx destptr, rtx srcptr, rtx value,
24067 rtx vec_value, rtx count, int align,
24068 int desired_alignment, bool issetmem)
24070 int i;
24071 for (i = 1; i < desired_alignment; i <<= 1)
24073 if (align <= i)
24075 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24076 if (issetmem)
24078 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24079 destmem = emit_memset (destmem, destptr, vec_value, i);
24080 else
24081 destmem = emit_memset (destmem, destptr, value, i);
24083 else
24084 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24085 ix86_adjust_counter (count, i);
24086 emit_label (label);
24087 LABEL_NUSES (label) = 1;
24088 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24091 return destmem;
24094 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24095 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24096 and jump to DONE_LABEL. */
24097 static void
24098 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24099 rtx destptr, rtx srcptr,
24100 rtx value, rtx vec_value,
24101 rtx count, int size,
24102 rtx done_label, bool issetmem)
24104 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24105 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24106 rtx modesize;
24107 int n;
24109 /* If we do not have vector value to copy, we must reduce size. */
24110 if (issetmem)
24112 if (!vec_value)
24114 if (GET_MODE (value) == VOIDmode && size > 8)
24115 mode = Pmode;
24116 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24117 mode = GET_MODE (value);
24119 else
24120 mode = GET_MODE (vec_value), value = vec_value;
24122 else
24124 /* Choose appropriate vector mode. */
24125 if (size >= 32)
24126 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24127 else if (size >= 16)
24128 mode = TARGET_SSE ? V16QImode : DImode;
24129 srcmem = change_address (srcmem, mode, srcptr);
24131 destmem = change_address (destmem, mode, destptr);
24132 modesize = GEN_INT (GET_MODE_SIZE (mode));
24133 gcc_assert (GET_MODE_SIZE (mode) <= size);
24134 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24136 if (issetmem)
24137 emit_move_insn (destmem, gen_lowpart (mode, value));
24138 else
24140 emit_move_insn (destmem, srcmem);
24141 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24143 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24146 destmem = offset_address (destmem, count, 1);
24147 destmem = offset_address (destmem, GEN_INT (-2 * size),
24148 GET_MODE_SIZE (mode));
24149 if (!issetmem)
24151 srcmem = offset_address (srcmem, count, 1);
24152 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24153 GET_MODE_SIZE (mode));
24155 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24157 if (issetmem)
24158 emit_move_insn (destmem, gen_lowpart (mode, value));
24159 else
24161 emit_move_insn (destmem, srcmem);
24162 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24164 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24166 emit_jump_insn (gen_jump (done_label));
24167 emit_barrier ();
24169 emit_label (label);
24170 LABEL_NUSES (label) = 1;
24173 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24174 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24175 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24176 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24177 DONE_LABEL is a label after the whole copying sequence. The label is created
24178 on demand if *DONE_LABEL is NULL.
24179 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24180 bounds after the initial copies.
24182 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24183 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24184 we will dispatch to a library call for large blocks.
24186 In pseudocode we do:
24188 if (COUNT < SIZE)
24190 Assume that SIZE is 4. Bigger sizes are handled analogously
24191 if (COUNT & 4)
24193 copy 4 bytes from SRCPTR to DESTPTR
24194 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24195 goto done_label
24197 if (!COUNT)
24198 goto done_label;
24199 copy 1 byte from SRCPTR to DESTPTR
24200 if (COUNT & 2)
24202 copy 2 bytes from SRCPTR to DESTPTR
24203 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24206 else
24208 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24209 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24211 OLD_DESPTR = DESTPTR;
24212 Align DESTPTR up to DESIRED_ALIGN
24213 SRCPTR += DESTPTR - OLD_DESTPTR
24214 COUNT -= DEST_PTR - OLD_DESTPTR
24215 if (DYNAMIC_CHECK)
24216 Round COUNT down to multiple of SIZE
24217 << optional caller supplied zero size guard is here >>
24218 << optional caller suppplied dynamic check is here >>
24219 << caller supplied main copy loop is here >>
24221 done_label:
24223 static void
24224 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24225 rtx *destptr, rtx *srcptr,
24226 machine_mode mode,
24227 rtx value, rtx vec_value,
24228 rtx *count,
24229 rtx_code_label **done_label,
24230 int size,
24231 int desired_align,
24232 int align,
24233 unsigned HOST_WIDE_INT *min_size,
24234 bool dynamic_check,
24235 bool issetmem)
24237 rtx_code_label *loop_label = NULL, *label;
24238 int n;
24239 rtx modesize;
24240 int prolog_size = 0;
24241 rtx mode_value;
24243 /* Chose proper value to copy. */
24244 if (issetmem && VECTOR_MODE_P (mode))
24245 mode_value = vec_value;
24246 else
24247 mode_value = value;
24248 gcc_assert (GET_MODE_SIZE (mode) <= size);
24250 /* See if block is big or small, handle small blocks. */
24251 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24253 int size2 = size;
24254 loop_label = gen_label_rtx ();
24256 if (!*done_label)
24257 *done_label = gen_label_rtx ();
24259 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24260 1, loop_label);
24261 size2 >>= 1;
24263 /* Handle sizes > 3. */
24264 for (;size2 > 2; size2 >>= 1)
24265 expand_small_movmem_or_setmem (destmem, srcmem,
24266 *destptr, *srcptr,
24267 value, vec_value,
24268 *count,
24269 size2, *done_label, issetmem);
24270 /* Nothing to copy? Jump to DONE_LABEL if so */
24271 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24272 1, *done_label);
24274 /* Do a byte copy. */
24275 destmem = change_address (destmem, QImode, *destptr);
24276 if (issetmem)
24277 emit_move_insn (destmem, gen_lowpart (QImode, value));
24278 else
24280 srcmem = change_address (srcmem, QImode, *srcptr);
24281 emit_move_insn (destmem, srcmem);
24284 /* Handle sizes 2 and 3. */
24285 label = ix86_expand_aligntest (*count, 2, false);
24286 destmem = change_address (destmem, HImode, *destptr);
24287 destmem = offset_address (destmem, *count, 1);
24288 destmem = offset_address (destmem, GEN_INT (-2), 2);
24289 if (issetmem)
24290 emit_move_insn (destmem, gen_lowpart (HImode, value));
24291 else
24293 srcmem = change_address (srcmem, HImode, *srcptr);
24294 srcmem = offset_address (srcmem, *count, 1);
24295 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24296 emit_move_insn (destmem, srcmem);
24299 emit_label (label);
24300 LABEL_NUSES (label) = 1;
24301 emit_jump_insn (gen_jump (*done_label));
24302 emit_barrier ();
24304 else
24305 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24306 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24308 /* Start memcpy for COUNT >= SIZE. */
24309 if (loop_label)
24311 emit_label (loop_label);
24312 LABEL_NUSES (loop_label) = 1;
24315 /* Copy first desired_align bytes. */
24316 if (!issetmem)
24317 srcmem = change_address (srcmem, mode, *srcptr);
24318 destmem = change_address (destmem, mode, *destptr);
24319 modesize = GEN_INT (GET_MODE_SIZE (mode));
24320 for (n = 0; prolog_size < desired_align - align; n++)
24322 if (issetmem)
24323 emit_move_insn (destmem, mode_value);
24324 else
24326 emit_move_insn (destmem, srcmem);
24327 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24329 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24330 prolog_size += GET_MODE_SIZE (mode);
24334 /* Copy last SIZE bytes. */
24335 destmem = offset_address (destmem, *count, 1);
24336 destmem = offset_address (destmem,
24337 GEN_INT (-size - prolog_size),
24339 if (issetmem)
24340 emit_move_insn (destmem, mode_value);
24341 else
24343 srcmem = offset_address (srcmem, *count, 1);
24344 srcmem = offset_address (srcmem,
24345 GEN_INT (-size - prolog_size),
24347 emit_move_insn (destmem, srcmem);
24349 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24351 destmem = offset_address (destmem, modesize, 1);
24352 if (issetmem)
24353 emit_move_insn (destmem, mode_value);
24354 else
24356 srcmem = offset_address (srcmem, modesize, 1);
24357 emit_move_insn (destmem, srcmem);
24361 /* Align destination. */
24362 if (desired_align > 1 && desired_align > align)
24364 rtx saveddest = *destptr;
24366 gcc_assert (desired_align <= size);
24367 /* Align destptr up, place it to new register. */
24368 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24369 GEN_INT (prolog_size),
24370 NULL_RTX, 1, OPTAB_DIRECT);
24371 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24372 REG_POINTER (*destptr) = 1;
24373 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24374 GEN_INT (-desired_align),
24375 *destptr, 1, OPTAB_DIRECT);
24376 /* See how many bytes we skipped. */
24377 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24378 *destptr,
24379 saveddest, 1, OPTAB_DIRECT);
24380 /* Adjust srcptr and count. */
24381 if (!issetmem)
24382 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24383 saveddest, *srcptr, 1, OPTAB_DIRECT);
24384 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24385 saveddest, *count, 1, OPTAB_DIRECT);
24386 /* We copied at most size + prolog_size. */
24387 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24388 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24389 else
24390 *min_size = 0;
24392 /* Our loops always round down the bock size, but for dispatch to library
24393 we need precise value. */
24394 if (dynamic_check)
24395 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24396 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24398 else
24400 gcc_assert (prolog_size == 0);
24401 /* Decrease count, so we won't end up copying last word twice. */
24402 if (!CONST_INT_P (*count))
24403 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24404 constm1_rtx, *count, 1, OPTAB_DIRECT);
24405 else
24406 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24407 if (*min_size)
24408 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24413 /* This function is like the previous one, except here we know how many bytes
24414 need to be copied. That allows us to update alignment not only of DST, which
24415 is returned, but also of SRC, which is passed as a pointer for that
24416 reason. */
24417 static rtx
24418 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24419 rtx srcreg, rtx value, rtx vec_value,
24420 int desired_align, int align_bytes,
24421 bool issetmem)
24423 rtx src = NULL;
24424 rtx orig_dst = dst;
24425 rtx orig_src = NULL;
24426 int piece_size = 1;
24427 int copied_bytes = 0;
24429 if (!issetmem)
24431 gcc_assert (srcp != NULL);
24432 src = *srcp;
24433 orig_src = src;
24436 for (piece_size = 1;
24437 piece_size <= desired_align && copied_bytes < align_bytes;
24438 piece_size <<= 1)
24440 if (align_bytes & piece_size)
24442 if (issetmem)
24444 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24445 dst = emit_memset (dst, destreg, vec_value, piece_size);
24446 else
24447 dst = emit_memset (dst, destreg, value, piece_size);
24449 else
24450 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24451 copied_bytes += piece_size;
24454 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24455 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24456 if (MEM_SIZE_KNOWN_P (orig_dst))
24457 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24459 if (!issetmem)
24461 int src_align_bytes = get_mem_align_offset (src, desired_align
24462 * BITS_PER_UNIT);
24463 if (src_align_bytes >= 0)
24464 src_align_bytes = desired_align - src_align_bytes;
24465 if (src_align_bytes >= 0)
24467 unsigned int src_align;
24468 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24470 if ((src_align_bytes & (src_align - 1))
24471 == (align_bytes & (src_align - 1)))
24472 break;
24474 if (src_align > (unsigned int) desired_align)
24475 src_align = desired_align;
24476 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24477 set_mem_align (src, src_align * BITS_PER_UNIT);
24479 if (MEM_SIZE_KNOWN_P (orig_src))
24480 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24481 *srcp = src;
24484 return dst;
24487 /* Return true if ALG can be used in current context.
24488 Assume we expand memset if MEMSET is true. */
24489 static bool
24490 alg_usable_p (enum stringop_alg alg, bool memset)
24492 if (alg == no_stringop)
24493 return false;
24494 if (alg == vector_loop)
24495 return TARGET_SSE || TARGET_AVX;
24496 /* Algorithms using the rep prefix want at least edi and ecx;
24497 additionally, memset wants eax and memcpy wants esi. Don't
24498 consider such algorithms if the user has appropriated those
24499 registers for their own purposes. */
24500 if (alg == rep_prefix_1_byte
24501 || alg == rep_prefix_4_byte
24502 || alg == rep_prefix_8_byte)
24503 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24504 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24505 return true;
24508 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24509 static enum stringop_alg
24510 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24511 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24512 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24514 const struct stringop_algs * algs;
24515 bool optimize_for_speed;
24516 int max = 0;
24517 const struct processor_costs *cost;
24518 int i;
24519 bool any_alg_usable_p = false;
24521 *noalign = false;
24522 *dynamic_check = -1;
24524 /* Even if the string operation call is cold, we still might spend a lot
24525 of time processing large blocks. */
24526 if (optimize_function_for_size_p (cfun)
24527 || (optimize_insn_for_size_p ()
24528 && (max_size < 256
24529 || (expected_size != -1 && expected_size < 256))))
24530 optimize_for_speed = false;
24531 else
24532 optimize_for_speed = true;
24534 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24535 if (memset)
24536 algs = &cost->memset[TARGET_64BIT != 0];
24537 else
24538 algs = &cost->memcpy[TARGET_64BIT != 0];
24540 /* See maximal size for user defined algorithm. */
24541 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24543 enum stringop_alg candidate = algs->size[i].alg;
24544 bool usable = alg_usable_p (candidate, memset);
24545 any_alg_usable_p |= usable;
24547 if (candidate != libcall && candidate && usable)
24548 max = algs->size[i].max;
24551 /* If expected size is not known but max size is small enough
24552 so inline version is a win, set expected size into
24553 the range. */
24554 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24555 && expected_size == -1)
24556 expected_size = min_size / 2 + max_size / 2;
24558 /* If user specified the algorithm, honnor it if possible. */
24559 if (ix86_stringop_alg != no_stringop
24560 && alg_usable_p (ix86_stringop_alg, memset))
24561 return ix86_stringop_alg;
24562 /* rep; movq or rep; movl is the smallest variant. */
24563 else if (!optimize_for_speed)
24565 *noalign = true;
24566 if (!count || (count & 3) || (memset && !zero_memset))
24567 return alg_usable_p (rep_prefix_1_byte, memset)
24568 ? rep_prefix_1_byte : loop_1_byte;
24569 else
24570 return alg_usable_p (rep_prefix_4_byte, memset)
24571 ? rep_prefix_4_byte : loop;
24573 /* Very tiny blocks are best handled via the loop, REP is expensive to
24574 setup. */
24575 else if (expected_size != -1 && expected_size < 4)
24576 return loop_1_byte;
24577 else if (expected_size != -1)
24579 enum stringop_alg alg = libcall;
24580 bool alg_noalign = false;
24581 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24583 /* We get here if the algorithms that were not libcall-based
24584 were rep-prefix based and we are unable to use rep prefixes
24585 based on global register usage. Break out of the loop and
24586 use the heuristic below. */
24587 if (algs->size[i].max == 0)
24588 break;
24589 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24591 enum stringop_alg candidate = algs->size[i].alg;
24593 if (candidate != libcall && alg_usable_p (candidate, memset))
24595 alg = candidate;
24596 alg_noalign = algs->size[i].noalign;
24598 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24599 last non-libcall inline algorithm. */
24600 if (TARGET_INLINE_ALL_STRINGOPS)
24602 /* When the current size is best to be copied by a libcall,
24603 but we are still forced to inline, run the heuristic below
24604 that will pick code for medium sized blocks. */
24605 if (alg != libcall)
24607 *noalign = alg_noalign;
24608 return alg;
24610 else if (!any_alg_usable_p)
24611 break;
24613 else if (alg_usable_p (candidate, memset))
24615 *noalign = algs->size[i].noalign;
24616 return candidate;
24621 /* When asked to inline the call anyway, try to pick meaningful choice.
24622 We look for maximal size of block that is faster to copy by hand and
24623 take blocks of at most of that size guessing that average size will
24624 be roughly half of the block.
24626 If this turns out to be bad, we might simply specify the preferred
24627 choice in ix86_costs. */
24628 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24629 && (algs->unknown_size == libcall
24630 || !alg_usable_p (algs->unknown_size, memset)))
24632 enum stringop_alg alg;
24634 /* If there aren't any usable algorithms, then recursing on
24635 smaller sizes isn't going to find anything. Just return the
24636 simple byte-at-a-time copy loop. */
24637 if (!any_alg_usable_p)
24639 /* Pick something reasonable. */
24640 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24641 *dynamic_check = 128;
24642 return loop_1_byte;
24644 if (max <= 0)
24645 max = 4096;
24646 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24647 zero_memset, dynamic_check, noalign);
24648 gcc_assert (*dynamic_check == -1);
24649 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24650 *dynamic_check = max;
24651 else
24652 gcc_assert (alg != libcall);
24653 return alg;
24655 return (alg_usable_p (algs->unknown_size, memset)
24656 ? algs->unknown_size : libcall);
24659 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24660 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24661 static int
24662 decide_alignment (int align,
24663 enum stringop_alg alg,
24664 int expected_size,
24665 machine_mode move_mode)
24667 int desired_align = 0;
24669 gcc_assert (alg != no_stringop);
24671 if (alg == libcall)
24672 return 0;
24673 if (move_mode == VOIDmode)
24674 return 0;
24676 desired_align = GET_MODE_SIZE (move_mode);
24677 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24678 copying whole cacheline at once. */
24679 if (TARGET_PENTIUMPRO
24680 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24681 desired_align = 8;
24683 if (optimize_size)
24684 desired_align = 1;
24685 if (desired_align < align)
24686 desired_align = align;
24687 if (expected_size != -1 && expected_size < 4)
24688 desired_align = align;
24690 return desired_align;
24694 /* Helper function for memcpy. For QImode value 0xXY produce
24695 0xXYXYXYXY of wide specified by MODE. This is essentially
24696 a * 0x10101010, but we can do slightly better than
24697 synth_mult by unwinding the sequence by hand on CPUs with
24698 slow multiply. */
24699 static rtx
24700 promote_duplicated_reg (machine_mode mode, rtx val)
24702 machine_mode valmode = GET_MODE (val);
24703 rtx tmp;
24704 int nops = mode == DImode ? 3 : 2;
24706 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24707 if (val == const0_rtx)
24708 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24709 if (CONST_INT_P (val))
24711 HOST_WIDE_INT v = INTVAL (val) & 255;
24713 v |= v << 8;
24714 v |= v << 16;
24715 if (mode == DImode)
24716 v |= (v << 16) << 16;
24717 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24720 if (valmode == VOIDmode)
24721 valmode = QImode;
24722 if (valmode != QImode)
24723 val = gen_lowpart (QImode, val);
24724 if (mode == QImode)
24725 return val;
24726 if (!TARGET_PARTIAL_REG_STALL)
24727 nops--;
24728 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24729 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24730 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24731 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24733 rtx reg = convert_modes (mode, QImode, val, true);
24734 tmp = promote_duplicated_reg (mode, const1_rtx);
24735 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24736 OPTAB_DIRECT);
24738 else
24740 rtx reg = convert_modes (mode, QImode, val, true);
24742 if (!TARGET_PARTIAL_REG_STALL)
24743 if (mode == SImode)
24744 emit_insn (gen_movsi_insv_1 (reg, reg));
24745 else
24746 emit_insn (gen_movdi_insv_1 (reg, reg));
24747 else
24749 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24750 NULL, 1, OPTAB_DIRECT);
24751 reg =
24752 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24754 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24755 NULL, 1, OPTAB_DIRECT);
24756 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24757 if (mode == SImode)
24758 return reg;
24759 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24760 NULL, 1, OPTAB_DIRECT);
24761 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24762 return reg;
24766 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24767 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24768 alignment from ALIGN to DESIRED_ALIGN. */
24769 static rtx
24770 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24771 int align)
24773 rtx promoted_val;
24775 if (TARGET_64BIT
24776 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24777 promoted_val = promote_duplicated_reg (DImode, val);
24778 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24779 promoted_val = promote_duplicated_reg (SImode, val);
24780 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24781 promoted_val = promote_duplicated_reg (HImode, val);
24782 else
24783 promoted_val = val;
24785 return promoted_val;
24788 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24789 operations when profitable. The code depends upon architecture, block size
24790 and alignment, but always has one of the following overall structures:
24792 Aligned move sequence:
24794 1) Prologue guard: Conditional that jumps up to epilogues for small
24795 blocks that can be handled by epilogue alone. This is faster
24796 but also needed for correctness, since prologue assume the block
24797 is larger than the desired alignment.
24799 Optional dynamic check for size and libcall for large
24800 blocks is emitted here too, with -minline-stringops-dynamically.
24802 2) Prologue: copy first few bytes in order to get destination
24803 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24804 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24805 copied. We emit either a jump tree on power of two sized
24806 blocks, or a byte loop.
24808 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24809 with specified algorithm.
24811 4) Epilogue: code copying tail of the block that is too small to be
24812 handled by main body (or up to size guarded by prologue guard).
24814 Misaligned move sequence
24816 1) missaligned move prologue/epilogue containing:
24817 a) Prologue handling small memory blocks and jumping to done_label
24818 (skipped if blocks are known to be large enough)
24819 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24820 needed by single possibly misaligned move
24821 (skipped if alignment is not needed)
24822 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24824 2) Zero size guard dispatching to done_label, if needed
24826 3) dispatch to library call, if needed,
24828 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24829 with specified algorithm. */
24830 bool
24831 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24832 rtx align_exp, rtx expected_align_exp,
24833 rtx expected_size_exp, rtx min_size_exp,
24834 rtx max_size_exp, rtx probable_max_size_exp,
24835 bool issetmem)
24837 rtx destreg;
24838 rtx srcreg = NULL;
24839 rtx_code_label *label = NULL;
24840 rtx tmp;
24841 rtx_code_label *jump_around_label = NULL;
24842 HOST_WIDE_INT align = 1;
24843 unsigned HOST_WIDE_INT count = 0;
24844 HOST_WIDE_INT expected_size = -1;
24845 int size_needed = 0, epilogue_size_needed;
24846 int desired_align = 0, align_bytes = 0;
24847 enum stringop_alg alg;
24848 rtx promoted_val = NULL;
24849 rtx vec_promoted_val = NULL;
24850 bool force_loopy_epilogue = false;
24851 int dynamic_check;
24852 bool need_zero_guard = false;
24853 bool noalign;
24854 machine_mode move_mode = VOIDmode;
24855 int unroll_factor = 1;
24856 /* TODO: Once value ranges are available, fill in proper data. */
24857 unsigned HOST_WIDE_INT min_size = 0;
24858 unsigned HOST_WIDE_INT max_size = -1;
24859 unsigned HOST_WIDE_INT probable_max_size = -1;
24860 bool misaligned_prologue_used = false;
24862 if (CONST_INT_P (align_exp))
24863 align = INTVAL (align_exp);
24864 /* i386 can do misaligned access on reasonably increased cost. */
24865 if (CONST_INT_P (expected_align_exp)
24866 && INTVAL (expected_align_exp) > align)
24867 align = INTVAL (expected_align_exp);
24868 /* ALIGN is the minimum of destination and source alignment, but we care here
24869 just about destination alignment. */
24870 else if (!issetmem
24871 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24872 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24874 if (CONST_INT_P (count_exp))
24876 min_size = max_size = probable_max_size = count = expected_size
24877 = INTVAL (count_exp);
24878 /* When COUNT is 0, there is nothing to do. */
24879 if (!count)
24880 return true;
24882 else
24884 if (min_size_exp)
24885 min_size = INTVAL (min_size_exp);
24886 if (max_size_exp)
24887 max_size = INTVAL (max_size_exp);
24888 if (probable_max_size_exp)
24889 probable_max_size = INTVAL (probable_max_size_exp);
24890 if (CONST_INT_P (expected_size_exp))
24891 expected_size = INTVAL (expected_size_exp);
24894 /* Make sure we don't need to care about overflow later on. */
24895 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24896 return false;
24898 /* Step 0: Decide on preferred algorithm, desired alignment and
24899 size of chunks to be copied by main loop. */
24900 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24901 issetmem,
24902 issetmem && val_exp == const0_rtx,
24903 &dynamic_check, &noalign);
24904 if (alg == libcall)
24905 return false;
24906 gcc_assert (alg != no_stringop);
24908 /* For now vector-version of memset is generated only for memory zeroing, as
24909 creating of promoted vector value is very cheap in this case. */
24910 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24911 alg = unrolled_loop;
24913 if (!count)
24914 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24915 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24916 if (!issetmem)
24917 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24919 unroll_factor = 1;
24920 move_mode = word_mode;
24921 switch (alg)
24923 case libcall:
24924 case no_stringop:
24925 case last_alg:
24926 gcc_unreachable ();
24927 case loop_1_byte:
24928 need_zero_guard = true;
24929 move_mode = QImode;
24930 break;
24931 case loop:
24932 need_zero_guard = true;
24933 break;
24934 case unrolled_loop:
24935 need_zero_guard = true;
24936 unroll_factor = (TARGET_64BIT ? 4 : 2);
24937 break;
24938 case vector_loop:
24939 need_zero_guard = true;
24940 unroll_factor = 4;
24941 /* Find the widest supported mode. */
24942 move_mode = word_mode;
24943 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24944 != CODE_FOR_nothing)
24945 move_mode = GET_MODE_WIDER_MODE (move_mode);
24947 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24948 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24949 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24951 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24952 move_mode = mode_for_vector (word_mode, nunits);
24953 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24954 move_mode = word_mode;
24956 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24957 break;
24958 case rep_prefix_8_byte:
24959 move_mode = DImode;
24960 break;
24961 case rep_prefix_4_byte:
24962 move_mode = SImode;
24963 break;
24964 case rep_prefix_1_byte:
24965 move_mode = QImode;
24966 break;
24968 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24969 epilogue_size_needed = size_needed;
24971 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24972 if (!TARGET_ALIGN_STRINGOPS || noalign)
24973 align = desired_align;
24975 /* Step 1: Prologue guard. */
24977 /* Alignment code needs count to be in register. */
24978 if (CONST_INT_P (count_exp) && desired_align > align)
24980 if (INTVAL (count_exp) > desired_align
24981 && INTVAL (count_exp) > size_needed)
24983 align_bytes
24984 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24985 if (align_bytes <= 0)
24986 align_bytes = 0;
24987 else
24988 align_bytes = desired_align - align_bytes;
24990 if (align_bytes == 0)
24991 count_exp = force_reg (counter_mode (count_exp), count_exp);
24993 gcc_assert (desired_align >= 1 && align >= 1);
24995 /* Misaligned move sequences handle both prologue and epilogue at once.
24996 Default code generation results in a smaller code for large alignments
24997 and also avoids redundant job when sizes are known precisely. */
24998 misaligned_prologue_used
24999 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25000 && MAX (desired_align, epilogue_size_needed) <= 32
25001 && desired_align <= epilogue_size_needed
25002 && ((desired_align > align && !align_bytes)
25003 || (!count && epilogue_size_needed > 1)));
25005 /* Do the cheap promotion to allow better CSE across the
25006 main loop and epilogue (ie one load of the big constant in the
25007 front of all code.
25008 For now the misaligned move sequences do not have fast path
25009 without broadcasting. */
25010 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25012 if (alg == vector_loop)
25014 gcc_assert (val_exp == const0_rtx);
25015 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25016 promoted_val = promote_duplicated_reg_to_size (val_exp,
25017 GET_MODE_SIZE (word_mode),
25018 desired_align, align);
25020 else
25022 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25023 desired_align, align);
25026 /* Misaligned move sequences handles both prologues and epilogues at once.
25027 Default code generation results in smaller code for large alignments and
25028 also avoids redundant job when sizes are known precisely. */
25029 if (misaligned_prologue_used)
25031 /* Misaligned move prologue handled small blocks by itself. */
25032 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25033 (dst, src, &destreg, &srcreg,
25034 move_mode, promoted_val, vec_promoted_val,
25035 &count_exp,
25036 &jump_around_label,
25037 desired_align < align
25038 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25039 desired_align, align, &min_size, dynamic_check, issetmem);
25040 if (!issetmem)
25041 src = change_address (src, BLKmode, srcreg);
25042 dst = change_address (dst, BLKmode, destreg);
25043 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25044 epilogue_size_needed = 0;
25045 if (need_zero_guard && !min_size)
25047 /* It is possible that we copied enough so the main loop will not
25048 execute. */
25049 gcc_assert (size_needed > 1);
25050 if (jump_around_label == NULL_RTX)
25051 jump_around_label = gen_label_rtx ();
25052 emit_cmp_and_jump_insns (count_exp,
25053 GEN_INT (size_needed),
25054 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25055 if (expected_size == -1
25056 || expected_size < (desired_align - align) / 2 + size_needed)
25057 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25058 else
25059 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25062 /* Ensure that alignment prologue won't copy past end of block. */
25063 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25065 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25066 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25067 Make sure it is power of 2. */
25068 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25070 /* To improve performance of small blocks, we jump around the VAL
25071 promoting mode. This mean that if the promoted VAL is not constant,
25072 we might not use it in the epilogue and have to use byte
25073 loop variant. */
25074 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25075 force_loopy_epilogue = true;
25076 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25077 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25079 /* If main algorithm works on QImode, no epilogue is needed.
25080 For small sizes just don't align anything. */
25081 if (size_needed == 1)
25082 desired_align = align;
25083 else
25084 goto epilogue;
25086 else if (!count
25087 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25089 label = gen_label_rtx ();
25090 emit_cmp_and_jump_insns (count_exp,
25091 GEN_INT (epilogue_size_needed),
25092 LTU, 0, counter_mode (count_exp), 1, label);
25093 if (expected_size == -1 || expected_size < epilogue_size_needed)
25094 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25095 else
25096 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25100 /* Emit code to decide on runtime whether library call or inline should be
25101 used. */
25102 if (dynamic_check != -1)
25104 if (!issetmem && CONST_INT_P (count_exp))
25106 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25108 emit_block_move_via_libcall (dst, src, count_exp, false);
25109 count_exp = const0_rtx;
25110 goto epilogue;
25113 else
25115 rtx_code_label *hot_label = gen_label_rtx ();
25116 if (jump_around_label == NULL_RTX)
25117 jump_around_label = gen_label_rtx ();
25118 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25119 LEU, 0, counter_mode (count_exp),
25120 1, hot_label);
25121 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25122 if (issetmem)
25123 set_storage_via_libcall (dst, count_exp, val_exp, false);
25124 else
25125 emit_block_move_via_libcall (dst, src, count_exp, false);
25126 emit_jump (jump_around_label);
25127 emit_label (hot_label);
25131 /* Step 2: Alignment prologue. */
25132 /* Do the expensive promotion once we branched off the small blocks. */
25133 if (issetmem && !promoted_val)
25134 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25135 desired_align, align);
25137 if (desired_align > align && !misaligned_prologue_used)
25139 if (align_bytes == 0)
25141 /* Except for the first move in prologue, we no longer know
25142 constant offset in aliasing info. It don't seems to worth
25143 the pain to maintain it for the first move, so throw away
25144 the info early. */
25145 dst = change_address (dst, BLKmode, destreg);
25146 if (!issetmem)
25147 src = change_address (src, BLKmode, srcreg);
25148 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25149 promoted_val, vec_promoted_val,
25150 count_exp, align, desired_align,
25151 issetmem);
25152 /* At most desired_align - align bytes are copied. */
25153 if (min_size < (unsigned)(desired_align - align))
25154 min_size = 0;
25155 else
25156 min_size -= desired_align - align;
25158 else
25160 /* If we know how many bytes need to be stored before dst is
25161 sufficiently aligned, maintain aliasing info accurately. */
25162 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25163 srcreg,
25164 promoted_val,
25165 vec_promoted_val,
25166 desired_align,
25167 align_bytes,
25168 issetmem);
25170 count_exp = plus_constant (counter_mode (count_exp),
25171 count_exp, -align_bytes);
25172 count -= align_bytes;
25173 min_size -= align_bytes;
25174 max_size -= align_bytes;
25176 if (need_zero_guard
25177 && !min_size
25178 && (count < (unsigned HOST_WIDE_INT) size_needed
25179 || (align_bytes == 0
25180 && count < ((unsigned HOST_WIDE_INT) size_needed
25181 + desired_align - align))))
25183 /* It is possible that we copied enough so the main loop will not
25184 execute. */
25185 gcc_assert (size_needed > 1);
25186 if (label == NULL_RTX)
25187 label = gen_label_rtx ();
25188 emit_cmp_and_jump_insns (count_exp,
25189 GEN_INT (size_needed),
25190 LTU, 0, counter_mode (count_exp), 1, label);
25191 if (expected_size == -1
25192 || expected_size < (desired_align - align) / 2 + size_needed)
25193 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25194 else
25195 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25198 if (label && size_needed == 1)
25200 emit_label (label);
25201 LABEL_NUSES (label) = 1;
25202 label = NULL;
25203 epilogue_size_needed = 1;
25204 if (issetmem)
25205 promoted_val = val_exp;
25207 else if (label == NULL_RTX && !misaligned_prologue_used)
25208 epilogue_size_needed = size_needed;
25210 /* Step 3: Main loop. */
25212 switch (alg)
25214 case libcall:
25215 case no_stringop:
25216 case last_alg:
25217 gcc_unreachable ();
25218 case loop_1_byte:
25219 case loop:
25220 case unrolled_loop:
25221 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25222 count_exp, move_mode, unroll_factor,
25223 expected_size, issetmem);
25224 break;
25225 case vector_loop:
25226 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25227 vec_promoted_val, count_exp, move_mode,
25228 unroll_factor, expected_size, issetmem);
25229 break;
25230 case rep_prefix_8_byte:
25231 case rep_prefix_4_byte:
25232 case rep_prefix_1_byte:
25233 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25234 val_exp, count_exp, move_mode, issetmem);
25235 break;
25237 /* Adjust properly the offset of src and dest memory for aliasing. */
25238 if (CONST_INT_P (count_exp))
25240 if (!issetmem)
25241 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25242 (count / size_needed) * size_needed);
25243 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25244 (count / size_needed) * size_needed);
25246 else
25248 if (!issetmem)
25249 src = change_address (src, BLKmode, srcreg);
25250 dst = change_address (dst, BLKmode, destreg);
25253 /* Step 4: Epilogue to copy the remaining bytes. */
25254 epilogue:
25255 if (label)
25257 /* When the main loop is done, COUNT_EXP might hold original count,
25258 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25259 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25260 bytes. Compensate if needed. */
25262 if (size_needed < epilogue_size_needed)
25264 tmp =
25265 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25266 GEN_INT (size_needed - 1), count_exp, 1,
25267 OPTAB_DIRECT);
25268 if (tmp != count_exp)
25269 emit_move_insn (count_exp, tmp);
25271 emit_label (label);
25272 LABEL_NUSES (label) = 1;
25275 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25277 if (force_loopy_epilogue)
25278 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25279 epilogue_size_needed);
25280 else
25282 if (issetmem)
25283 expand_setmem_epilogue (dst, destreg, promoted_val,
25284 vec_promoted_val, count_exp,
25285 epilogue_size_needed);
25286 else
25287 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25288 epilogue_size_needed);
25291 if (jump_around_label)
25292 emit_label (jump_around_label);
25293 return true;
25297 /* Expand the appropriate insns for doing strlen if not just doing
25298 repnz; scasb
25300 out = result, initialized with the start address
25301 align_rtx = alignment of the address.
25302 scratch = scratch register, initialized with the startaddress when
25303 not aligned, otherwise undefined
25305 This is just the body. It needs the initializations mentioned above and
25306 some address computing at the end. These things are done in i386.md. */
25308 static void
25309 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25311 int align;
25312 rtx tmp;
25313 rtx_code_label *align_2_label = NULL;
25314 rtx_code_label *align_3_label = NULL;
25315 rtx_code_label *align_4_label = gen_label_rtx ();
25316 rtx_code_label *end_0_label = gen_label_rtx ();
25317 rtx mem;
25318 rtx tmpreg = gen_reg_rtx (SImode);
25319 rtx scratch = gen_reg_rtx (SImode);
25320 rtx cmp;
25322 align = 0;
25323 if (CONST_INT_P (align_rtx))
25324 align = INTVAL (align_rtx);
25326 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25328 /* Is there a known alignment and is it less than 4? */
25329 if (align < 4)
25331 rtx scratch1 = gen_reg_rtx (Pmode);
25332 emit_move_insn (scratch1, out);
25333 /* Is there a known alignment and is it not 2? */
25334 if (align != 2)
25336 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25337 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25339 /* Leave just the 3 lower bits. */
25340 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25341 NULL_RTX, 0, OPTAB_WIDEN);
25343 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25344 Pmode, 1, align_4_label);
25345 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25346 Pmode, 1, align_2_label);
25347 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25348 Pmode, 1, align_3_label);
25350 else
25352 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25353 check if is aligned to 4 - byte. */
25355 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25356 NULL_RTX, 0, OPTAB_WIDEN);
25358 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25359 Pmode, 1, align_4_label);
25362 mem = change_address (src, QImode, out);
25364 /* Now compare the bytes. */
25366 /* Compare the first n unaligned byte on a byte per byte basis. */
25367 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25368 QImode, 1, end_0_label);
25370 /* Increment the address. */
25371 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25373 /* Not needed with an alignment of 2 */
25374 if (align != 2)
25376 emit_label (align_2_label);
25378 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25379 end_0_label);
25381 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25383 emit_label (align_3_label);
25386 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25387 end_0_label);
25389 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25392 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25393 align this loop. It gives only huge programs, but does not help to
25394 speed up. */
25395 emit_label (align_4_label);
25397 mem = change_address (src, SImode, out);
25398 emit_move_insn (scratch, mem);
25399 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25401 /* This formula yields a nonzero result iff one of the bytes is zero.
25402 This saves three branches inside loop and many cycles. */
25404 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25405 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25406 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25407 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25408 gen_int_mode (0x80808080, SImode)));
25409 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25410 align_4_label);
25412 if (TARGET_CMOVE)
25414 rtx reg = gen_reg_rtx (SImode);
25415 rtx reg2 = gen_reg_rtx (Pmode);
25416 emit_move_insn (reg, tmpreg);
25417 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25419 /* If zero is not in the first two bytes, move two bytes forward. */
25420 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25421 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25422 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25423 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25424 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25425 reg,
25426 tmpreg)));
25427 /* Emit lea manually to avoid clobbering of flags. */
25428 emit_insn (gen_rtx_SET (SImode, reg2,
25429 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25431 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25432 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25433 emit_insn (gen_rtx_SET (VOIDmode, out,
25434 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25435 reg2,
25436 out)));
25438 else
25440 rtx_code_label *end_2_label = gen_label_rtx ();
25441 /* Is zero in the first two bytes? */
25443 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25444 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25445 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25446 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25447 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25448 pc_rtx);
25449 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25450 JUMP_LABEL (tmp) = end_2_label;
25452 /* Not in the first two. Move two bytes forward. */
25453 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25454 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25456 emit_label (end_2_label);
25460 /* Avoid branch in fixing the byte. */
25461 tmpreg = gen_lowpart (QImode, tmpreg);
25462 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25463 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25464 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25465 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25467 emit_label (end_0_label);
25470 /* Expand strlen. */
25472 bool
25473 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25475 rtx addr, scratch1, scratch2, scratch3, scratch4;
25477 /* The generic case of strlen expander is long. Avoid it's
25478 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25480 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25481 && !TARGET_INLINE_ALL_STRINGOPS
25482 && !optimize_insn_for_size_p ()
25483 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25484 return false;
25486 addr = force_reg (Pmode, XEXP (src, 0));
25487 scratch1 = gen_reg_rtx (Pmode);
25489 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25490 && !optimize_insn_for_size_p ())
25492 /* Well it seems that some optimizer does not combine a call like
25493 foo(strlen(bar), strlen(bar));
25494 when the move and the subtraction is done here. It does calculate
25495 the length just once when these instructions are done inside of
25496 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25497 often used and I use one fewer register for the lifetime of
25498 output_strlen_unroll() this is better. */
25500 emit_move_insn (out, addr);
25502 ix86_expand_strlensi_unroll_1 (out, src, align);
25504 /* strlensi_unroll_1 returns the address of the zero at the end of
25505 the string, like memchr(), so compute the length by subtracting
25506 the start address. */
25507 emit_insn (ix86_gen_sub3 (out, out, addr));
25509 else
25511 rtx unspec;
25513 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25514 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25515 return false;
25517 scratch2 = gen_reg_rtx (Pmode);
25518 scratch3 = gen_reg_rtx (Pmode);
25519 scratch4 = force_reg (Pmode, constm1_rtx);
25521 emit_move_insn (scratch3, addr);
25522 eoschar = force_reg (QImode, eoschar);
25524 src = replace_equiv_address_nv (src, scratch3);
25526 /* If .md starts supporting :P, this can be done in .md. */
25527 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25528 scratch4), UNSPEC_SCAS);
25529 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25530 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25531 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25533 return true;
25536 /* For given symbol (function) construct code to compute address of it's PLT
25537 entry in large x86-64 PIC model. */
25538 static rtx
25539 construct_plt_address (rtx symbol)
25541 rtx tmp, unspec;
25543 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25544 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25545 gcc_assert (Pmode == DImode);
25547 tmp = gen_reg_rtx (Pmode);
25548 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25550 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25551 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25552 return tmp;
25556 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25557 rtx callarg2,
25558 rtx pop, bool sibcall)
25560 rtx vec[3];
25561 rtx use = NULL, call;
25562 unsigned int vec_len = 0;
25564 if (pop == const0_rtx)
25565 pop = NULL;
25566 gcc_assert (!TARGET_64BIT || !pop);
25568 if (TARGET_MACHO && !TARGET_64BIT)
25570 #if TARGET_MACHO
25571 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25572 fnaddr = machopic_indirect_call_target (fnaddr);
25573 #endif
25575 else
25577 /* Static functions and indirect calls don't need the pic register. */
25578 if (flag_pic
25579 && (!TARGET_64BIT
25580 || (ix86_cmodel == CM_LARGE_PIC
25581 && DEFAULT_ABI != MS_ABI))
25582 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25583 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25585 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25586 if (ix86_use_pseudo_pic_reg ())
25587 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25588 pic_offset_table_rtx);
25592 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25593 parameters passed in vector registers. */
25594 if (TARGET_64BIT
25595 && (INTVAL (callarg2) > 0
25596 || (INTVAL (callarg2) == 0
25597 && (TARGET_SSE || !flag_skip_rax_setup))))
25599 rtx al = gen_rtx_REG (QImode, AX_REG);
25600 emit_move_insn (al, callarg2);
25601 use_reg (&use, al);
25604 if (ix86_cmodel == CM_LARGE_PIC
25605 && !TARGET_PECOFF
25606 && MEM_P (fnaddr)
25607 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25608 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25609 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25610 else if (sibcall
25611 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25612 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25614 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25615 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25618 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25620 if (retval)
25622 /* We should add bounds as destination register in case
25623 pointer with bounds may be returned. */
25624 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25626 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25627 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25628 if (GET_CODE (retval) == PARALLEL)
25630 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25631 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25632 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25633 retval = chkp_join_splitted_slot (retval, par);
25635 else
25637 retval = gen_rtx_PARALLEL (VOIDmode,
25638 gen_rtvec (3, retval, b0, b1));
25639 chkp_put_regs_to_expr_list (retval);
25643 call = gen_rtx_SET (VOIDmode, retval, call);
25645 vec[vec_len++] = call;
25647 if (pop)
25649 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25650 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25651 vec[vec_len++] = pop;
25654 if (TARGET_64BIT_MS_ABI
25655 && (!callarg2 || INTVAL (callarg2) != -2))
25657 int const cregs_size
25658 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25659 int i;
25661 for (i = 0; i < cregs_size; i++)
25663 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25664 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25666 clobber_reg (&use, gen_rtx_REG (mode, regno));
25670 if (vec_len > 1)
25671 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25672 call = emit_call_insn (call);
25673 if (use)
25674 CALL_INSN_FUNCTION_USAGE (call) = use;
25676 return call;
25679 /* Output the assembly for a call instruction. */
25681 const char *
25682 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25684 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25685 bool seh_nop_p = false;
25686 const char *xasm;
25688 if (SIBLING_CALL_P (insn))
25690 if (direct_p)
25691 xasm = "%!jmp\t%P0";
25692 /* SEH epilogue detection requires the indirect branch case
25693 to include REX.W. */
25694 else if (TARGET_SEH)
25695 xasm = "%!rex.W jmp %A0";
25696 else
25697 xasm = "%!jmp\t%A0";
25699 output_asm_insn (xasm, &call_op);
25700 return "";
25703 /* SEH unwinding can require an extra nop to be emitted in several
25704 circumstances. Determine if we have one of those. */
25705 if (TARGET_SEH)
25707 rtx_insn *i;
25709 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25711 /* If we get to another real insn, we don't need the nop. */
25712 if (INSN_P (i))
25713 break;
25715 /* If we get to the epilogue note, prevent a catch region from
25716 being adjacent to the standard epilogue sequence. If non-
25717 call-exceptions, we'll have done this during epilogue emission. */
25718 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25719 && !flag_non_call_exceptions
25720 && !can_throw_internal (insn))
25722 seh_nop_p = true;
25723 break;
25727 /* If we didn't find a real insn following the call, prevent the
25728 unwinder from looking into the next function. */
25729 if (i == NULL)
25730 seh_nop_p = true;
25733 if (direct_p)
25734 xasm = "%!call\t%P0";
25735 else
25736 xasm = "%!call\t%A0";
25738 output_asm_insn (xasm, &call_op);
25740 if (seh_nop_p)
25741 return "nop";
25743 return "";
25746 /* Clear stack slot assignments remembered from previous functions.
25747 This is called from INIT_EXPANDERS once before RTL is emitted for each
25748 function. */
25750 static struct machine_function *
25751 ix86_init_machine_status (void)
25753 struct machine_function *f;
25755 f = ggc_cleared_alloc<machine_function> ();
25756 f->use_fast_prologue_epilogue_nregs = -1;
25757 f->call_abi = ix86_abi;
25759 return f;
25762 /* Return a MEM corresponding to a stack slot with mode MODE.
25763 Allocate a new slot if necessary.
25765 The RTL for a function can have several slots available: N is
25766 which slot to use. */
25769 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25771 struct stack_local_entry *s;
25773 gcc_assert (n < MAX_386_STACK_LOCALS);
25775 for (s = ix86_stack_locals; s; s = s->next)
25776 if (s->mode == mode && s->n == n)
25777 return validize_mem (copy_rtx (s->rtl));
25779 s = ggc_alloc<stack_local_entry> ();
25780 s->n = n;
25781 s->mode = mode;
25782 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25784 s->next = ix86_stack_locals;
25785 ix86_stack_locals = s;
25786 return validize_mem (copy_rtx (s->rtl));
25789 static void
25790 ix86_instantiate_decls (void)
25792 struct stack_local_entry *s;
25794 for (s = ix86_stack_locals; s; s = s->next)
25795 if (s->rtl != NULL_RTX)
25796 instantiate_decl_rtl (s->rtl);
25799 /* Check whether x86 address PARTS is a pc-relative address. */
25801 static bool
25802 rip_relative_addr_p (struct ix86_address *parts)
25804 rtx base, index, disp;
25806 base = parts->base;
25807 index = parts->index;
25808 disp = parts->disp;
25810 if (disp && !base && !index)
25812 if (TARGET_64BIT)
25814 rtx symbol = disp;
25816 if (GET_CODE (disp) == CONST)
25817 symbol = XEXP (disp, 0);
25818 if (GET_CODE (symbol) == PLUS
25819 && CONST_INT_P (XEXP (symbol, 1)))
25820 symbol = XEXP (symbol, 0);
25822 if (GET_CODE (symbol) == LABEL_REF
25823 || (GET_CODE (symbol) == SYMBOL_REF
25824 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25825 || (GET_CODE (symbol) == UNSPEC
25826 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25827 || XINT (symbol, 1) == UNSPEC_PCREL
25828 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25829 return true;
25832 return false;
25835 /* Calculate the length of the memory address in the instruction encoding.
25836 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25837 or other prefixes. We never generate addr32 prefix for LEA insn. */
25840 memory_address_length (rtx addr, bool lea)
25842 struct ix86_address parts;
25843 rtx base, index, disp;
25844 int len;
25845 int ok;
25847 if (GET_CODE (addr) == PRE_DEC
25848 || GET_CODE (addr) == POST_INC
25849 || GET_CODE (addr) == PRE_MODIFY
25850 || GET_CODE (addr) == POST_MODIFY)
25851 return 0;
25853 ok = ix86_decompose_address (addr, &parts);
25854 gcc_assert (ok);
25856 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25858 /* If this is not LEA instruction, add the length of addr32 prefix. */
25859 if (TARGET_64BIT && !lea
25860 && (SImode_address_operand (addr, VOIDmode)
25861 || (parts.base && GET_MODE (parts.base) == SImode)
25862 || (parts.index && GET_MODE (parts.index) == SImode)))
25863 len++;
25865 base = parts.base;
25866 index = parts.index;
25867 disp = parts.disp;
25869 if (base && GET_CODE (base) == SUBREG)
25870 base = SUBREG_REG (base);
25871 if (index && GET_CODE (index) == SUBREG)
25872 index = SUBREG_REG (index);
25874 gcc_assert (base == NULL_RTX || REG_P (base));
25875 gcc_assert (index == NULL_RTX || REG_P (index));
25877 /* Rule of thumb:
25878 - esp as the base always wants an index,
25879 - ebp as the base always wants a displacement,
25880 - r12 as the base always wants an index,
25881 - r13 as the base always wants a displacement. */
25883 /* Register Indirect. */
25884 if (base && !index && !disp)
25886 /* esp (for its index) and ebp (for its displacement) need
25887 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25888 code. */
25889 if (base == arg_pointer_rtx
25890 || base == frame_pointer_rtx
25891 || REGNO (base) == SP_REG
25892 || REGNO (base) == BP_REG
25893 || REGNO (base) == R12_REG
25894 || REGNO (base) == R13_REG)
25895 len++;
25898 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25899 is not disp32, but disp32(%rip), so for disp32
25900 SIB byte is needed, unless print_operand_address
25901 optimizes it into disp32(%rip) or (%rip) is implied
25902 by UNSPEC. */
25903 else if (disp && !base && !index)
25905 len += 4;
25906 if (rip_relative_addr_p (&parts))
25907 len++;
25909 else
25911 /* Find the length of the displacement constant. */
25912 if (disp)
25914 if (base && satisfies_constraint_K (disp))
25915 len += 1;
25916 else
25917 len += 4;
25919 /* ebp always wants a displacement. Similarly r13. */
25920 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25921 len++;
25923 /* An index requires the two-byte modrm form.... */
25924 if (index
25925 /* ...like esp (or r12), which always wants an index. */
25926 || base == arg_pointer_rtx
25927 || base == frame_pointer_rtx
25928 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25929 len++;
25932 return len;
25935 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25936 is set, expect that insn have 8bit immediate alternative. */
25938 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25940 int len = 0;
25941 int i;
25942 extract_insn_cached (insn);
25943 for (i = recog_data.n_operands - 1; i >= 0; --i)
25944 if (CONSTANT_P (recog_data.operand[i]))
25946 enum attr_mode mode = get_attr_mode (insn);
25948 gcc_assert (!len);
25949 if (shortform && CONST_INT_P (recog_data.operand[i]))
25951 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25952 switch (mode)
25954 case MODE_QI:
25955 len = 1;
25956 continue;
25957 case MODE_HI:
25958 ival = trunc_int_for_mode (ival, HImode);
25959 break;
25960 case MODE_SI:
25961 ival = trunc_int_for_mode (ival, SImode);
25962 break;
25963 default:
25964 break;
25966 if (IN_RANGE (ival, -128, 127))
25968 len = 1;
25969 continue;
25972 switch (mode)
25974 case MODE_QI:
25975 len = 1;
25976 break;
25977 case MODE_HI:
25978 len = 2;
25979 break;
25980 case MODE_SI:
25981 len = 4;
25982 break;
25983 /* Immediates for DImode instructions are encoded
25984 as 32bit sign extended values. */
25985 case MODE_DI:
25986 len = 4;
25987 break;
25988 default:
25989 fatal_insn ("unknown insn mode", insn);
25992 return len;
25995 /* Compute default value for "length_address" attribute. */
25997 ix86_attr_length_address_default (rtx_insn *insn)
25999 int i;
26001 if (get_attr_type (insn) == TYPE_LEA)
26003 rtx set = PATTERN (insn), addr;
26005 if (GET_CODE (set) == PARALLEL)
26006 set = XVECEXP (set, 0, 0);
26008 gcc_assert (GET_CODE (set) == SET);
26010 addr = SET_SRC (set);
26012 return memory_address_length (addr, true);
26015 extract_insn_cached (insn);
26016 for (i = recog_data.n_operands - 1; i >= 0; --i)
26017 if (MEM_P (recog_data.operand[i]))
26019 constrain_operands_cached (insn, reload_completed);
26020 if (which_alternative != -1)
26022 const char *constraints = recog_data.constraints[i];
26023 int alt = which_alternative;
26025 while (*constraints == '=' || *constraints == '+')
26026 constraints++;
26027 while (alt-- > 0)
26028 while (*constraints++ != ',')
26030 /* Skip ignored operands. */
26031 if (*constraints == 'X')
26032 continue;
26034 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26036 return 0;
26039 /* Compute default value for "length_vex" attribute. It includes
26040 2 or 3 byte VEX prefix and 1 opcode byte. */
26043 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26044 bool has_vex_w)
26046 int i;
26048 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26049 byte VEX prefix. */
26050 if (!has_0f_opcode || has_vex_w)
26051 return 3 + 1;
26053 /* We can always use 2 byte VEX prefix in 32bit. */
26054 if (!TARGET_64BIT)
26055 return 2 + 1;
26057 extract_insn_cached (insn);
26059 for (i = recog_data.n_operands - 1; i >= 0; --i)
26060 if (REG_P (recog_data.operand[i]))
26062 /* REX.W bit uses 3 byte VEX prefix. */
26063 if (GET_MODE (recog_data.operand[i]) == DImode
26064 && GENERAL_REG_P (recog_data.operand[i]))
26065 return 3 + 1;
26067 else
26069 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26070 if (MEM_P (recog_data.operand[i])
26071 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26072 return 3 + 1;
26075 return 2 + 1;
26078 /* Return the maximum number of instructions a cpu can issue. */
26080 static int
26081 ix86_issue_rate (void)
26083 switch (ix86_tune)
26085 case PROCESSOR_PENTIUM:
26086 case PROCESSOR_BONNELL:
26087 case PROCESSOR_SILVERMONT:
26088 case PROCESSOR_KNL:
26089 case PROCESSOR_INTEL:
26090 case PROCESSOR_K6:
26091 case PROCESSOR_BTVER2:
26092 case PROCESSOR_PENTIUM4:
26093 case PROCESSOR_NOCONA:
26094 return 2;
26096 case PROCESSOR_PENTIUMPRO:
26097 case PROCESSOR_ATHLON:
26098 case PROCESSOR_K8:
26099 case PROCESSOR_AMDFAM10:
26100 case PROCESSOR_GENERIC:
26101 case PROCESSOR_BTVER1:
26102 return 3;
26104 case PROCESSOR_BDVER1:
26105 case PROCESSOR_BDVER2:
26106 case PROCESSOR_BDVER3:
26107 case PROCESSOR_BDVER4:
26108 case PROCESSOR_CORE2:
26109 case PROCESSOR_NEHALEM:
26110 case PROCESSOR_SANDYBRIDGE:
26111 case PROCESSOR_HASWELL:
26112 return 4;
26114 default:
26115 return 1;
26119 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26120 by DEP_INSN and nothing set by DEP_INSN. */
26122 static bool
26123 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26125 rtx set, set2;
26127 /* Simplify the test for uninteresting insns. */
26128 if (insn_type != TYPE_SETCC
26129 && insn_type != TYPE_ICMOV
26130 && insn_type != TYPE_FCMOV
26131 && insn_type != TYPE_IBR)
26132 return false;
26134 if ((set = single_set (dep_insn)) != 0)
26136 set = SET_DEST (set);
26137 set2 = NULL_RTX;
26139 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26140 && XVECLEN (PATTERN (dep_insn), 0) == 2
26141 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26142 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26144 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26145 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26147 else
26148 return false;
26150 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26151 return false;
26153 /* This test is true if the dependent insn reads the flags but
26154 not any other potentially set register. */
26155 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26156 return false;
26158 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26159 return false;
26161 return true;
26164 /* Return true iff USE_INSN has a memory address with operands set by
26165 SET_INSN. */
26167 bool
26168 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26170 int i;
26171 extract_insn_cached (use_insn);
26172 for (i = recog_data.n_operands - 1; i >= 0; --i)
26173 if (MEM_P (recog_data.operand[i]))
26175 rtx addr = XEXP (recog_data.operand[i], 0);
26176 return modified_in_p (addr, set_insn) != 0;
26178 return false;
26181 /* Helper function for exact_store_load_dependency.
26182 Return true if addr is found in insn. */
26183 static bool
26184 exact_dependency_1 (rtx addr, rtx insn)
26186 enum rtx_code code;
26187 const char *format_ptr;
26188 int i, j;
26190 code = GET_CODE (insn);
26191 switch (code)
26193 case MEM:
26194 if (rtx_equal_p (addr, insn))
26195 return true;
26196 break;
26197 case REG:
26198 CASE_CONST_ANY:
26199 case SYMBOL_REF:
26200 case CODE_LABEL:
26201 case PC:
26202 case CC0:
26203 case EXPR_LIST:
26204 return false;
26205 default:
26206 break;
26209 format_ptr = GET_RTX_FORMAT (code);
26210 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26212 switch (*format_ptr++)
26214 case 'e':
26215 if (exact_dependency_1 (addr, XEXP (insn, i)))
26216 return true;
26217 break;
26218 case 'E':
26219 for (j = 0; j < XVECLEN (insn, i); j++)
26220 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26221 return true;
26222 break;
26225 return false;
26228 /* Return true if there exists exact dependency for store & load, i.e.
26229 the same memory address is used in them. */
26230 static bool
26231 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26233 rtx set1, set2;
26235 set1 = single_set (store);
26236 if (!set1)
26237 return false;
26238 if (!MEM_P (SET_DEST (set1)))
26239 return false;
26240 set2 = single_set (load);
26241 if (!set2)
26242 return false;
26243 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26244 return true;
26245 return false;
26248 static int
26249 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26251 enum attr_type insn_type, dep_insn_type;
26252 enum attr_memory memory;
26253 rtx set, set2;
26254 int dep_insn_code_number;
26256 /* Anti and output dependencies have zero cost on all CPUs. */
26257 if (REG_NOTE_KIND (link) != 0)
26258 return 0;
26260 dep_insn_code_number = recog_memoized (dep_insn);
26262 /* If we can't recognize the insns, we can't really do anything. */
26263 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26264 return cost;
26266 insn_type = get_attr_type (insn);
26267 dep_insn_type = get_attr_type (dep_insn);
26269 switch (ix86_tune)
26271 case PROCESSOR_PENTIUM:
26272 /* Address Generation Interlock adds a cycle of latency. */
26273 if (insn_type == TYPE_LEA)
26275 rtx addr = PATTERN (insn);
26277 if (GET_CODE (addr) == PARALLEL)
26278 addr = XVECEXP (addr, 0, 0);
26280 gcc_assert (GET_CODE (addr) == SET);
26282 addr = SET_SRC (addr);
26283 if (modified_in_p (addr, dep_insn))
26284 cost += 1;
26286 else if (ix86_agi_dependent (dep_insn, insn))
26287 cost += 1;
26289 /* ??? Compares pair with jump/setcc. */
26290 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26291 cost = 0;
26293 /* Floating point stores require value to be ready one cycle earlier. */
26294 if (insn_type == TYPE_FMOV
26295 && get_attr_memory (insn) == MEMORY_STORE
26296 && !ix86_agi_dependent (dep_insn, insn))
26297 cost += 1;
26298 break;
26300 case PROCESSOR_PENTIUMPRO:
26301 /* INT->FP conversion is expensive. */
26302 if (get_attr_fp_int_src (dep_insn))
26303 cost += 5;
26305 /* There is one cycle extra latency between an FP op and a store. */
26306 if (insn_type == TYPE_FMOV
26307 && (set = single_set (dep_insn)) != NULL_RTX
26308 && (set2 = single_set (insn)) != NULL_RTX
26309 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26310 && MEM_P (SET_DEST (set2)))
26311 cost += 1;
26313 memory = get_attr_memory (insn);
26315 /* Show ability of reorder buffer to hide latency of load by executing
26316 in parallel with previous instruction in case
26317 previous instruction is not needed to compute the address. */
26318 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26319 && !ix86_agi_dependent (dep_insn, insn))
26321 /* Claim moves to take one cycle, as core can issue one load
26322 at time and the next load can start cycle later. */
26323 if (dep_insn_type == TYPE_IMOV
26324 || dep_insn_type == TYPE_FMOV)
26325 cost = 1;
26326 else if (cost > 1)
26327 cost--;
26329 break;
26331 case PROCESSOR_K6:
26332 /* The esp dependency is resolved before
26333 the instruction is really finished. */
26334 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26335 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26336 return 1;
26338 /* INT->FP conversion is expensive. */
26339 if (get_attr_fp_int_src (dep_insn))
26340 cost += 5;
26342 memory = get_attr_memory (insn);
26344 /* Show ability of reorder buffer to hide latency of load by executing
26345 in parallel with previous instruction in case
26346 previous instruction is not needed to compute the address. */
26347 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26348 && !ix86_agi_dependent (dep_insn, insn))
26350 /* Claim moves to take one cycle, as core can issue one load
26351 at time and the next load can start cycle later. */
26352 if (dep_insn_type == TYPE_IMOV
26353 || dep_insn_type == TYPE_FMOV)
26354 cost = 1;
26355 else if (cost > 2)
26356 cost -= 2;
26357 else
26358 cost = 1;
26360 break;
26362 case PROCESSOR_AMDFAM10:
26363 case PROCESSOR_BDVER1:
26364 case PROCESSOR_BDVER2:
26365 case PROCESSOR_BDVER3:
26366 case PROCESSOR_BDVER4:
26367 case PROCESSOR_BTVER1:
26368 case PROCESSOR_BTVER2:
26369 case PROCESSOR_GENERIC:
26370 /* Stack engine allows to execute push&pop instructions in parall. */
26371 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26372 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26373 return 0;
26374 /* FALLTHRU */
26376 case PROCESSOR_ATHLON:
26377 case PROCESSOR_K8:
26378 memory = get_attr_memory (insn);
26380 /* Show ability of reorder buffer to hide latency of load by executing
26381 in parallel with previous instruction in case
26382 previous instruction is not needed to compute the address. */
26383 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26384 && !ix86_agi_dependent (dep_insn, insn))
26386 enum attr_unit unit = get_attr_unit (insn);
26387 int loadcost = 3;
26389 /* Because of the difference between the length of integer and
26390 floating unit pipeline preparation stages, the memory operands
26391 for floating point are cheaper.
26393 ??? For Athlon it the difference is most probably 2. */
26394 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26395 loadcost = 3;
26396 else
26397 loadcost = TARGET_ATHLON ? 2 : 0;
26399 if (cost >= loadcost)
26400 cost -= loadcost;
26401 else
26402 cost = 0;
26404 break;
26406 case PROCESSOR_CORE2:
26407 case PROCESSOR_NEHALEM:
26408 case PROCESSOR_SANDYBRIDGE:
26409 case PROCESSOR_HASWELL:
26410 /* Stack engine allows to execute push&pop instructions in parall. */
26411 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26412 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26413 return 0;
26415 memory = get_attr_memory (insn);
26417 /* Show ability of reorder buffer to hide latency of load by executing
26418 in parallel with previous instruction in case
26419 previous instruction is not needed to compute the address. */
26420 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26421 && !ix86_agi_dependent (dep_insn, insn))
26423 if (cost >= 4)
26424 cost -= 4;
26425 else
26426 cost = 0;
26428 break;
26430 case PROCESSOR_SILVERMONT:
26431 case PROCESSOR_KNL:
26432 case PROCESSOR_INTEL:
26433 if (!reload_completed)
26434 return cost;
26436 /* Increase cost of integer loads. */
26437 memory = get_attr_memory (dep_insn);
26438 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26440 enum attr_unit unit = get_attr_unit (dep_insn);
26441 if (unit == UNIT_INTEGER && cost == 1)
26443 if (memory == MEMORY_LOAD)
26444 cost = 3;
26445 else
26447 /* Increase cost of ld/st for short int types only
26448 because of store forwarding issue. */
26449 rtx set = single_set (dep_insn);
26450 if (set && (GET_MODE (SET_DEST (set)) == QImode
26451 || GET_MODE (SET_DEST (set)) == HImode))
26453 /* Increase cost of store/load insn if exact
26454 dependence exists and it is load insn. */
26455 enum attr_memory insn_memory = get_attr_memory (insn);
26456 if (insn_memory == MEMORY_LOAD
26457 && exact_store_load_dependency (dep_insn, insn))
26458 cost = 3;
26464 default:
26465 break;
26468 return cost;
26471 /* How many alternative schedules to try. This should be as wide as the
26472 scheduling freedom in the DFA, but no wider. Making this value too
26473 large results extra work for the scheduler. */
26475 static int
26476 ia32_multipass_dfa_lookahead (void)
26478 switch (ix86_tune)
26480 case PROCESSOR_PENTIUM:
26481 return 2;
26483 case PROCESSOR_PENTIUMPRO:
26484 case PROCESSOR_K6:
26485 return 1;
26487 case PROCESSOR_BDVER1:
26488 case PROCESSOR_BDVER2:
26489 case PROCESSOR_BDVER3:
26490 case PROCESSOR_BDVER4:
26491 /* We use lookahead value 4 for BD both before and after reload
26492 schedules. Plan is to have value 8 included for O3. */
26493 return 4;
26495 case PROCESSOR_CORE2:
26496 case PROCESSOR_NEHALEM:
26497 case PROCESSOR_SANDYBRIDGE:
26498 case PROCESSOR_HASWELL:
26499 case PROCESSOR_BONNELL:
26500 case PROCESSOR_SILVERMONT:
26501 case PROCESSOR_KNL:
26502 case PROCESSOR_INTEL:
26503 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26504 as many instructions can be executed on a cycle, i.e.,
26505 issue_rate. I wonder why tuning for many CPUs does not do this. */
26506 if (reload_completed)
26507 return ix86_issue_rate ();
26508 /* Don't use lookahead for pre-reload schedule to save compile time. */
26509 return 0;
26511 default:
26512 return 0;
26516 /* Return true if target platform supports macro-fusion. */
26518 static bool
26519 ix86_macro_fusion_p ()
26521 return TARGET_FUSE_CMP_AND_BRANCH;
26524 /* Check whether current microarchitecture support macro fusion
26525 for insn pair "CONDGEN + CONDJMP". Refer to
26526 "Intel Architectures Optimization Reference Manual". */
26528 static bool
26529 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26531 rtx src, dest;
26532 enum rtx_code ccode;
26533 rtx compare_set = NULL_RTX, test_if, cond;
26534 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26536 if (!any_condjump_p (condjmp))
26537 return false;
26539 if (get_attr_type (condgen) != TYPE_TEST
26540 && get_attr_type (condgen) != TYPE_ICMP
26541 && get_attr_type (condgen) != TYPE_INCDEC
26542 && get_attr_type (condgen) != TYPE_ALU)
26543 return false;
26545 compare_set = single_set (condgen);
26546 if (compare_set == NULL_RTX
26547 && !TARGET_FUSE_ALU_AND_BRANCH)
26548 return false;
26550 if (compare_set == NULL_RTX)
26552 int i;
26553 rtx pat = PATTERN (condgen);
26554 for (i = 0; i < XVECLEN (pat, 0); i++)
26555 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26557 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26558 if (GET_CODE (set_src) == COMPARE)
26559 compare_set = XVECEXP (pat, 0, i);
26560 else
26561 alu_set = XVECEXP (pat, 0, i);
26564 if (compare_set == NULL_RTX)
26565 return false;
26566 src = SET_SRC (compare_set);
26567 if (GET_CODE (src) != COMPARE)
26568 return false;
26570 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26571 supported. */
26572 if ((MEM_P (XEXP (src, 0))
26573 && CONST_INT_P (XEXP (src, 1)))
26574 || (MEM_P (XEXP (src, 1))
26575 && CONST_INT_P (XEXP (src, 0))))
26576 return false;
26578 /* No fusion for RIP-relative address. */
26579 if (MEM_P (XEXP (src, 0)))
26580 addr = XEXP (XEXP (src, 0), 0);
26581 else if (MEM_P (XEXP (src, 1)))
26582 addr = XEXP (XEXP (src, 1), 0);
26584 if (addr) {
26585 ix86_address parts;
26586 int ok = ix86_decompose_address (addr, &parts);
26587 gcc_assert (ok);
26589 if (rip_relative_addr_p (&parts))
26590 return false;
26593 test_if = SET_SRC (pc_set (condjmp));
26594 cond = XEXP (test_if, 0);
26595 ccode = GET_CODE (cond);
26596 /* Check whether conditional jump use Sign or Overflow Flags. */
26597 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26598 && (ccode == GE
26599 || ccode == GT
26600 || ccode == LE
26601 || ccode == LT))
26602 return false;
26604 /* Return true for TYPE_TEST and TYPE_ICMP. */
26605 if (get_attr_type (condgen) == TYPE_TEST
26606 || get_attr_type (condgen) == TYPE_ICMP)
26607 return true;
26609 /* The following is the case that macro-fusion for alu + jmp. */
26610 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26611 return false;
26613 /* No fusion for alu op with memory destination operand. */
26614 dest = SET_DEST (alu_set);
26615 if (MEM_P (dest))
26616 return false;
26618 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26619 supported. */
26620 if (get_attr_type (condgen) == TYPE_INCDEC
26621 && (ccode == GEU
26622 || ccode == GTU
26623 || ccode == LEU
26624 || ccode == LTU))
26625 return false;
26627 return true;
26630 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26631 execution. It is applied if
26632 (1) IMUL instruction is on the top of list;
26633 (2) There exists the only producer of independent IMUL instruction in
26634 ready list.
26635 Return index of IMUL producer if it was found and -1 otherwise. */
26636 static int
26637 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26639 rtx_insn *insn;
26640 rtx set, insn1, insn2;
26641 sd_iterator_def sd_it;
26642 dep_t dep;
26643 int index = -1;
26644 int i;
26646 if (!TARGET_BONNELL)
26647 return index;
26649 /* Check that IMUL instruction is on the top of ready list. */
26650 insn = ready[n_ready - 1];
26651 set = single_set (insn);
26652 if (!set)
26653 return index;
26654 if (!(GET_CODE (SET_SRC (set)) == MULT
26655 && GET_MODE (SET_SRC (set)) == SImode))
26656 return index;
26658 /* Search for producer of independent IMUL instruction. */
26659 for (i = n_ready - 2; i >= 0; i--)
26661 insn = ready[i];
26662 if (!NONDEBUG_INSN_P (insn))
26663 continue;
26664 /* Skip IMUL instruction. */
26665 insn2 = PATTERN (insn);
26666 if (GET_CODE (insn2) == PARALLEL)
26667 insn2 = XVECEXP (insn2, 0, 0);
26668 if (GET_CODE (insn2) == SET
26669 && GET_CODE (SET_SRC (insn2)) == MULT
26670 && GET_MODE (SET_SRC (insn2)) == SImode)
26671 continue;
26673 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26675 rtx con;
26676 con = DEP_CON (dep);
26677 if (!NONDEBUG_INSN_P (con))
26678 continue;
26679 insn1 = PATTERN (con);
26680 if (GET_CODE (insn1) == PARALLEL)
26681 insn1 = XVECEXP (insn1, 0, 0);
26683 if (GET_CODE (insn1) == SET
26684 && GET_CODE (SET_SRC (insn1)) == MULT
26685 && GET_MODE (SET_SRC (insn1)) == SImode)
26687 sd_iterator_def sd_it1;
26688 dep_t dep1;
26689 /* Check if there is no other dependee for IMUL. */
26690 index = i;
26691 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26693 rtx pro;
26694 pro = DEP_PRO (dep1);
26695 if (!NONDEBUG_INSN_P (pro))
26696 continue;
26697 if (pro != insn)
26698 index = -1;
26700 if (index >= 0)
26701 break;
26704 if (index >= 0)
26705 break;
26707 return index;
26710 /* Try to find the best candidate on the top of ready list if two insns
26711 have the same priority - candidate is best if its dependees were
26712 scheduled earlier. Applied for Silvermont only.
26713 Return true if top 2 insns must be interchanged. */
26714 static bool
26715 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26717 rtx_insn *top = ready[n_ready - 1];
26718 rtx_insn *next = ready[n_ready - 2];
26719 rtx set;
26720 sd_iterator_def sd_it;
26721 dep_t dep;
26722 int clock1 = -1;
26723 int clock2 = -1;
26724 #define INSN_TICK(INSN) (HID (INSN)->tick)
26726 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26727 return false;
26729 if (!NONDEBUG_INSN_P (top))
26730 return false;
26731 if (!NONJUMP_INSN_P (top))
26732 return false;
26733 if (!NONDEBUG_INSN_P (next))
26734 return false;
26735 if (!NONJUMP_INSN_P (next))
26736 return false;
26737 set = single_set (top);
26738 if (!set)
26739 return false;
26740 set = single_set (next);
26741 if (!set)
26742 return false;
26744 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26746 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26747 return false;
26748 /* Determine winner more precise. */
26749 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26751 rtx pro;
26752 pro = DEP_PRO (dep);
26753 if (!NONDEBUG_INSN_P (pro))
26754 continue;
26755 if (INSN_TICK (pro) > clock1)
26756 clock1 = INSN_TICK (pro);
26758 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26760 rtx pro;
26761 pro = DEP_PRO (dep);
26762 if (!NONDEBUG_INSN_P (pro))
26763 continue;
26764 if (INSN_TICK (pro) > clock2)
26765 clock2 = INSN_TICK (pro);
26768 if (clock1 == clock2)
26770 /* Determine winner - load must win. */
26771 enum attr_memory memory1, memory2;
26772 memory1 = get_attr_memory (top);
26773 memory2 = get_attr_memory (next);
26774 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26775 return true;
26777 return (bool) (clock2 < clock1);
26779 return false;
26780 #undef INSN_TICK
26783 /* Perform possible reodering of ready list for Atom/Silvermont only.
26784 Return issue rate. */
26785 static int
26786 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26787 int *pn_ready, int clock_var)
26789 int issue_rate = -1;
26790 int n_ready = *pn_ready;
26791 int i;
26792 rtx_insn *insn;
26793 int index = -1;
26795 /* Set up issue rate. */
26796 issue_rate = ix86_issue_rate ();
26798 /* Do reodering for BONNELL/SILVERMONT only. */
26799 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26800 return issue_rate;
26802 /* Nothing to do if ready list contains only 1 instruction. */
26803 if (n_ready <= 1)
26804 return issue_rate;
26806 /* Do reodering for post-reload scheduler only. */
26807 if (!reload_completed)
26808 return issue_rate;
26810 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26812 if (sched_verbose > 1)
26813 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26814 INSN_UID (ready[index]));
26816 /* Put IMUL producer (ready[index]) at the top of ready list. */
26817 insn = ready[index];
26818 for (i = index; i < n_ready - 1; i++)
26819 ready[i] = ready[i + 1];
26820 ready[n_ready - 1] = insn;
26821 return issue_rate;
26824 /* Skip selective scheduling since HID is not populated in it. */
26825 if (clock_var != 0
26826 && !sel_sched_p ()
26827 && swap_top_of_ready_list (ready, n_ready))
26829 if (sched_verbose > 1)
26830 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26831 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26832 /* Swap 2 top elements of ready list. */
26833 insn = ready[n_ready - 1];
26834 ready[n_ready - 1] = ready[n_ready - 2];
26835 ready[n_ready - 2] = insn;
26837 return issue_rate;
26840 static bool
26841 ix86_class_likely_spilled_p (reg_class_t);
26843 /* Returns true if lhs of insn is HW function argument register and set up
26844 is_spilled to true if it is likely spilled HW register. */
26845 static bool
26846 insn_is_function_arg (rtx insn, bool* is_spilled)
26848 rtx dst;
26850 if (!NONDEBUG_INSN_P (insn))
26851 return false;
26852 /* Call instructions are not movable, ignore it. */
26853 if (CALL_P (insn))
26854 return false;
26855 insn = PATTERN (insn);
26856 if (GET_CODE (insn) == PARALLEL)
26857 insn = XVECEXP (insn, 0, 0);
26858 if (GET_CODE (insn) != SET)
26859 return false;
26860 dst = SET_DEST (insn);
26861 if (REG_P (dst) && HARD_REGISTER_P (dst)
26862 && ix86_function_arg_regno_p (REGNO (dst)))
26864 /* Is it likely spilled HW register? */
26865 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26866 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26867 *is_spilled = true;
26868 return true;
26870 return false;
26873 /* Add output dependencies for chain of function adjacent arguments if only
26874 there is a move to likely spilled HW register. Return first argument
26875 if at least one dependence was added or NULL otherwise. */
26876 static rtx_insn *
26877 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26879 rtx_insn *insn;
26880 rtx_insn *last = call;
26881 rtx_insn *first_arg = NULL;
26882 bool is_spilled = false;
26884 head = PREV_INSN (head);
26886 /* Find nearest to call argument passing instruction. */
26887 while (true)
26889 last = PREV_INSN (last);
26890 if (last == head)
26891 return NULL;
26892 if (!NONDEBUG_INSN_P (last))
26893 continue;
26894 if (insn_is_function_arg (last, &is_spilled))
26895 break;
26896 return NULL;
26899 first_arg = last;
26900 while (true)
26902 insn = PREV_INSN (last);
26903 if (!INSN_P (insn))
26904 break;
26905 if (insn == head)
26906 break;
26907 if (!NONDEBUG_INSN_P (insn))
26909 last = insn;
26910 continue;
26912 if (insn_is_function_arg (insn, &is_spilled))
26914 /* Add output depdendence between two function arguments if chain
26915 of output arguments contains likely spilled HW registers. */
26916 if (is_spilled)
26917 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26918 first_arg = last = insn;
26920 else
26921 break;
26923 if (!is_spilled)
26924 return NULL;
26925 return first_arg;
26928 /* Add output or anti dependency from insn to first_arg to restrict its code
26929 motion. */
26930 static void
26931 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26933 rtx set;
26934 rtx tmp;
26936 /* Add anti dependencies for bounds stores. */
26937 if (INSN_P (insn)
26938 && GET_CODE (PATTERN (insn)) == PARALLEL
26939 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26940 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26942 add_dependence (first_arg, insn, REG_DEP_ANTI);
26943 return;
26946 set = single_set (insn);
26947 if (!set)
26948 return;
26949 tmp = SET_DEST (set);
26950 if (REG_P (tmp))
26952 /* Add output dependency to the first function argument. */
26953 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26954 return;
26956 /* Add anti dependency. */
26957 add_dependence (first_arg, insn, REG_DEP_ANTI);
26960 /* Avoid cross block motion of function argument through adding dependency
26961 from the first non-jump instruction in bb. */
26962 static void
26963 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26965 rtx_insn *insn = BB_END (bb);
26967 while (insn)
26969 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26971 rtx set = single_set (insn);
26972 if (set)
26974 avoid_func_arg_motion (arg, insn);
26975 return;
26978 if (insn == BB_HEAD (bb))
26979 return;
26980 insn = PREV_INSN (insn);
26984 /* Hook for pre-reload schedule - avoid motion of function arguments
26985 passed in likely spilled HW registers. */
26986 static void
26987 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26989 rtx_insn *insn;
26990 rtx_insn *first_arg = NULL;
26991 if (reload_completed)
26992 return;
26993 while (head != tail && DEBUG_INSN_P (head))
26994 head = NEXT_INSN (head);
26995 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26996 if (INSN_P (insn) && CALL_P (insn))
26998 first_arg = add_parameter_dependencies (insn, head);
26999 if (first_arg)
27001 /* Add dependee for first argument to predecessors if only
27002 region contains more than one block. */
27003 basic_block bb = BLOCK_FOR_INSN (insn);
27004 int rgn = CONTAINING_RGN (bb->index);
27005 int nr_blks = RGN_NR_BLOCKS (rgn);
27006 /* Skip trivial regions and region head blocks that can have
27007 predecessors outside of region. */
27008 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27010 edge e;
27011 edge_iterator ei;
27013 /* Regions are SCCs with the exception of selective
27014 scheduling with pipelining of outer blocks enabled.
27015 So also check that immediate predecessors of a non-head
27016 block are in the same region. */
27017 FOR_EACH_EDGE (e, ei, bb->preds)
27019 /* Avoid creating of loop-carried dependencies through
27020 using topological ordering in the region. */
27021 if (rgn == CONTAINING_RGN (e->src->index)
27022 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27023 add_dependee_for_func_arg (first_arg, e->src);
27026 insn = first_arg;
27027 if (insn == head)
27028 break;
27031 else if (first_arg)
27032 avoid_func_arg_motion (first_arg, insn);
27035 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27036 HW registers to maximum, to schedule them at soon as possible. These are
27037 moves from function argument registers at the top of the function entry
27038 and moves from function return value registers after call. */
27039 static int
27040 ix86_adjust_priority (rtx_insn *insn, int priority)
27042 rtx set;
27044 if (reload_completed)
27045 return priority;
27047 if (!NONDEBUG_INSN_P (insn))
27048 return priority;
27050 set = single_set (insn);
27051 if (set)
27053 rtx tmp = SET_SRC (set);
27054 if (REG_P (tmp)
27055 && HARD_REGISTER_P (tmp)
27056 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27057 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27058 return current_sched_info->sched_max_insns_priority;
27061 return priority;
27064 /* Model decoder of Core 2/i7.
27065 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27066 track the instruction fetch block boundaries and make sure that long
27067 (9+ bytes) instructions are assigned to D0. */
27069 /* Maximum length of an insn that can be handled by
27070 a secondary decoder unit. '8' for Core 2/i7. */
27071 static int core2i7_secondary_decoder_max_insn_size;
27073 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27074 '16' for Core 2/i7. */
27075 static int core2i7_ifetch_block_size;
27077 /* Maximum number of instructions decoder can handle per cycle.
27078 '6' for Core 2/i7. */
27079 static int core2i7_ifetch_block_max_insns;
27081 typedef struct ix86_first_cycle_multipass_data_ *
27082 ix86_first_cycle_multipass_data_t;
27083 typedef const struct ix86_first_cycle_multipass_data_ *
27084 const_ix86_first_cycle_multipass_data_t;
27086 /* A variable to store target state across calls to max_issue within
27087 one cycle. */
27088 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27089 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27091 /* Initialize DATA. */
27092 static void
27093 core2i7_first_cycle_multipass_init (void *_data)
27095 ix86_first_cycle_multipass_data_t data
27096 = (ix86_first_cycle_multipass_data_t) _data;
27098 data->ifetch_block_len = 0;
27099 data->ifetch_block_n_insns = 0;
27100 data->ready_try_change = NULL;
27101 data->ready_try_change_size = 0;
27104 /* Advancing the cycle; reset ifetch block counts. */
27105 static void
27106 core2i7_dfa_post_advance_cycle (void)
27108 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27110 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27112 data->ifetch_block_len = 0;
27113 data->ifetch_block_n_insns = 0;
27116 static int min_insn_size (rtx_insn *);
27118 /* Filter out insns from ready_try that the core will not be able to issue
27119 on current cycle due to decoder. */
27120 static void
27121 core2i7_first_cycle_multipass_filter_ready_try
27122 (const_ix86_first_cycle_multipass_data_t data,
27123 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27125 while (n_ready--)
27127 rtx_insn *insn;
27128 int insn_size;
27130 if (ready_try[n_ready])
27131 continue;
27133 insn = get_ready_element (n_ready);
27134 insn_size = min_insn_size (insn);
27136 if (/* If this is a too long an insn for a secondary decoder ... */
27137 (!first_cycle_insn_p
27138 && insn_size > core2i7_secondary_decoder_max_insn_size)
27139 /* ... or it would not fit into the ifetch block ... */
27140 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27141 /* ... or the decoder is full already ... */
27142 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27143 /* ... mask the insn out. */
27145 ready_try[n_ready] = 1;
27147 if (data->ready_try_change)
27148 bitmap_set_bit (data->ready_try_change, n_ready);
27153 /* Prepare for a new round of multipass lookahead scheduling. */
27154 static void
27155 core2i7_first_cycle_multipass_begin (void *_data,
27156 signed char *ready_try, int n_ready,
27157 bool first_cycle_insn_p)
27159 ix86_first_cycle_multipass_data_t data
27160 = (ix86_first_cycle_multipass_data_t) _data;
27161 const_ix86_first_cycle_multipass_data_t prev_data
27162 = ix86_first_cycle_multipass_data;
27164 /* Restore the state from the end of the previous round. */
27165 data->ifetch_block_len = prev_data->ifetch_block_len;
27166 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27168 /* Filter instructions that cannot be issued on current cycle due to
27169 decoder restrictions. */
27170 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27171 first_cycle_insn_p);
27174 /* INSN is being issued in current solution. Account for its impact on
27175 the decoder model. */
27176 static void
27177 core2i7_first_cycle_multipass_issue (void *_data,
27178 signed char *ready_try, int n_ready,
27179 rtx_insn *insn, const void *_prev_data)
27181 ix86_first_cycle_multipass_data_t data
27182 = (ix86_first_cycle_multipass_data_t) _data;
27183 const_ix86_first_cycle_multipass_data_t prev_data
27184 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27186 int insn_size = min_insn_size (insn);
27188 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27189 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27190 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27191 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27193 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27194 if (!data->ready_try_change)
27196 data->ready_try_change = sbitmap_alloc (n_ready);
27197 data->ready_try_change_size = n_ready;
27199 else if (data->ready_try_change_size < n_ready)
27201 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27202 n_ready, 0);
27203 data->ready_try_change_size = n_ready;
27205 bitmap_clear (data->ready_try_change);
27207 /* Filter out insns from ready_try that the core will not be able to issue
27208 on current cycle due to decoder. */
27209 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27210 false);
27213 /* Revert the effect on ready_try. */
27214 static void
27215 core2i7_first_cycle_multipass_backtrack (const void *_data,
27216 signed char *ready_try,
27217 int n_ready ATTRIBUTE_UNUSED)
27219 const_ix86_first_cycle_multipass_data_t data
27220 = (const_ix86_first_cycle_multipass_data_t) _data;
27221 unsigned int i = 0;
27222 sbitmap_iterator sbi;
27224 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27225 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27227 ready_try[i] = 0;
27231 /* Save the result of multipass lookahead scheduling for the next round. */
27232 static void
27233 core2i7_first_cycle_multipass_end (const void *_data)
27235 const_ix86_first_cycle_multipass_data_t data
27236 = (const_ix86_first_cycle_multipass_data_t) _data;
27237 ix86_first_cycle_multipass_data_t next_data
27238 = ix86_first_cycle_multipass_data;
27240 if (data != NULL)
27242 next_data->ifetch_block_len = data->ifetch_block_len;
27243 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27247 /* Deallocate target data. */
27248 static void
27249 core2i7_first_cycle_multipass_fini (void *_data)
27251 ix86_first_cycle_multipass_data_t data
27252 = (ix86_first_cycle_multipass_data_t) _data;
27254 if (data->ready_try_change)
27256 sbitmap_free (data->ready_try_change);
27257 data->ready_try_change = NULL;
27258 data->ready_try_change_size = 0;
27262 /* Prepare for scheduling pass. */
27263 static void
27264 ix86_sched_init_global (FILE *, int, int)
27266 /* Install scheduling hooks for current CPU. Some of these hooks are used
27267 in time-critical parts of the scheduler, so we only set them up when
27268 they are actually used. */
27269 switch (ix86_tune)
27271 case PROCESSOR_CORE2:
27272 case PROCESSOR_NEHALEM:
27273 case PROCESSOR_SANDYBRIDGE:
27274 case PROCESSOR_HASWELL:
27275 /* Do not perform multipass scheduling for pre-reload schedule
27276 to save compile time. */
27277 if (reload_completed)
27279 targetm.sched.dfa_post_advance_cycle
27280 = core2i7_dfa_post_advance_cycle;
27281 targetm.sched.first_cycle_multipass_init
27282 = core2i7_first_cycle_multipass_init;
27283 targetm.sched.first_cycle_multipass_begin
27284 = core2i7_first_cycle_multipass_begin;
27285 targetm.sched.first_cycle_multipass_issue
27286 = core2i7_first_cycle_multipass_issue;
27287 targetm.sched.first_cycle_multipass_backtrack
27288 = core2i7_first_cycle_multipass_backtrack;
27289 targetm.sched.first_cycle_multipass_end
27290 = core2i7_first_cycle_multipass_end;
27291 targetm.sched.first_cycle_multipass_fini
27292 = core2i7_first_cycle_multipass_fini;
27294 /* Set decoder parameters. */
27295 core2i7_secondary_decoder_max_insn_size = 8;
27296 core2i7_ifetch_block_size = 16;
27297 core2i7_ifetch_block_max_insns = 6;
27298 break;
27300 /* ... Fall through ... */
27301 default:
27302 targetm.sched.dfa_post_advance_cycle = NULL;
27303 targetm.sched.first_cycle_multipass_init = NULL;
27304 targetm.sched.first_cycle_multipass_begin = NULL;
27305 targetm.sched.first_cycle_multipass_issue = NULL;
27306 targetm.sched.first_cycle_multipass_backtrack = NULL;
27307 targetm.sched.first_cycle_multipass_end = NULL;
27308 targetm.sched.first_cycle_multipass_fini = NULL;
27309 break;
27314 /* Compute the alignment given to a constant that is being placed in memory.
27315 EXP is the constant and ALIGN is the alignment that the object would
27316 ordinarily have.
27317 The value of this function is used instead of that alignment to align
27318 the object. */
27321 ix86_constant_alignment (tree exp, int align)
27323 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27324 || TREE_CODE (exp) == INTEGER_CST)
27326 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27327 return 64;
27328 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27329 return 128;
27331 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27332 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27333 return BITS_PER_WORD;
27335 return align;
27338 /* Compute the alignment for a static variable.
27339 TYPE is the data type, and ALIGN is the alignment that
27340 the object would ordinarily have. The value of this function is used
27341 instead of that alignment to align the object. */
27344 ix86_data_alignment (tree type, int align, bool opt)
27346 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27347 for symbols from other compilation units or symbols that don't need
27348 to bind locally. In order to preserve some ABI compatibility with
27349 those compilers, ensure we don't decrease alignment from what we
27350 used to assume. */
27352 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27354 /* A data structure, equal or greater than the size of a cache line
27355 (64 bytes in the Pentium 4 and other recent Intel processors, including
27356 processors based on Intel Core microarchitecture) should be aligned
27357 so that its base address is a multiple of a cache line size. */
27359 int max_align
27360 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27362 if (max_align < BITS_PER_WORD)
27363 max_align = BITS_PER_WORD;
27365 switch (ix86_align_data_type)
27367 case ix86_align_data_type_abi: opt = false; break;
27368 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27369 case ix86_align_data_type_cacheline: break;
27372 if (opt
27373 && AGGREGATE_TYPE_P (type)
27374 && TYPE_SIZE (type)
27375 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27377 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27378 && align < max_align_compat)
27379 align = max_align_compat;
27380 if (wi::geu_p (TYPE_SIZE (type), max_align)
27381 && align < max_align)
27382 align = max_align;
27385 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27386 to 16byte boundary. */
27387 if (TARGET_64BIT)
27389 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27390 && TYPE_SIZE (type)
27391 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27392 && wi::geu_p (TYPE_SIZE (type), 128)
27393 && align < 128)
27394 return 128;
27397 if (!opt)
27398 return align;
27400 if (TREE_CODE (type) == ARRAY_TYPE)
27402 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27403 return 64;
27404 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27405 return 128;
27407 else if (TREE_CODE (type) == COMPLEX_TYPE)
27410 if (TYPE_MODE (type) == DCmode && align < 64)
27411 return 64;
27412 if ((TYPE_MODE (type) == XCmode
27413 || TYPE_MODE (type) == TCmode) && align < 128)
27414 return 128;
27416 else if ((TREE_CODE (type) == RECORD_TYPE
27417 || TREE_CODE (type) == UNION_TYPE
27418 || TREE_CODE (type) == QUAL_UNION_TYPE)
27419 && TYPE_FIELDS (type))
27421 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27422 return 64;
27423 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27424 return 128;
27426 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27427 || TREE_CODE (type) == INTEGER_TYPE)
27429 if (TYPE_MODE (type) == DFmode && align < 64)
27430 return 64;
27431 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27432 return 128;
27435 return align;
27438 /* Compute the alignment for a local variable or a stack slot. EXP is
27439 the data type or decl itself, MODE is the widest mode available and
27440 ALIGN is the alignment that the object would ordinarily have. The
27441 value of this macro is used instead of that alignment to align the
27442 object. */
27444 unsigned int
27445 ix86_local_alignment (tree exp, machine_mode mode,
27446 unsigned int align)
27448 tree type, decl;
27450 if (exp && DECL_P (exp))
27452 type = TREE_TYPE (exp);
27453 decl = exp;
27455 else
27457 type = exp;
27458 decl = NULL;
27461 /* Don't do dynamic stack realignment for long long objects with
27462 -mpreferred-stack-boundary=2. */
27463 if (!TARGET_64BIT
27464 && align == 64
27465 && ix86_preferred_stack_boundary < 64
27466 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27467 && (!type || !TYPE_USER_ALIGN (type))
27468 && (!decl || !DECL_USER_ALIGN (decl)))
27469 align = 32;
27471 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27472 register in MODE. We will return the largest alignment of XF
27473 and DF. */
27474 if (!type)
27476 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27477 align = GET_MODE_ALIGNMENT (DFmode);
27478 return align;
27481 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27482 to 16byte boundary. Exact wording is:
27484 An array uses the same alignment as its elements, except that a local or
27485 global array variable of length at least 16 bytes or
27486 a C99 variable-length array variable always has alignment of at least 16 bytes.
27488 This was added to allow use of aligned SSE instructions at arrays. This
27489 rule is meant for static storage (where compiler can not do the analysis
27490 by itself). We follow it for automatic variables only when convenient.
27491 We fully control everything in the function compiled and functions from
27492 other unit can not rely on the alignment.
27494 Exclude va_list type. It is the common case of local array where
27495 we can not benefit from the alignment.
27497 TODO: Probably one should optimize for size only when var is not escaping. */
27498 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27499 && TARGET_SSE)
27501 if (AGGREGATE_TYPE_P (type)
27502 && (va_list_type_node == NULL_TREE
27503 || (TYPE_MAIN_VARIANT (type)
27504 != TYPE_MAIN_VARIANT (va_list_type_node)))
27505 && TYPE_SIZE (type)
27506 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27507 && wi::geu_p (TYPE_SIZE (type), 16)
27508 && align < 128)
27509 return 128;
27511 if (TREE_CODE (type) == ARRAY_TYPE)
27513 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27514 return 64;
27515 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27516 return 128;
27518 else if (TREE_CODE (type) == COMPLEX_TYPE)
27520 if (TYPE_MODE (type) == DCmode && align < 64)
27521 return 64;
27522 if ((TYPE_MODE (type) == XCmode
27523 || TYPE_MODE (type) == TCmode) && align < 128)
27524 return 128;
27526 else if ((TREE_CODE (type) == RECORD_TYPE
27527 || TREE_CODE (type) == UNION_TYPE
27528 || TREE_CODE (type) == QUAL_UNION_TYPE)
27529 && TYPE_FIELDS (type))
27531 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27532 return 64;
27533 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27534 return 128;
27536 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27537 || TREE_CODE (type) == INTEGER_TYPE)
27540 if (TYPE_MODE (type) == DFmode && align < 64)
27541 return 64;
27542 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27543 return 128;
27545 return align;
27548 /* Compute the minimum required alignment for dynamic stack realignment
27549 purposes for a local variable, parameter or a stack slot. EXP is
27550 the data type or decl itself, MODE is its mode and ALIGN is the
27551 alignment that the object would ordinarily have. */
27553 unsigned int
27554 ix86_minimum_alignment (tree exp, machine_mode mode,
27555 unsigned int align)
27557 tree type, decl;
27559 if (exp && DECL_P (exp))
27561 type = TREE_TYPE (exp);
27562 decl = exp;
27564 else
27566 type = exp;
27567 decl = NULL;
27570 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27571 return align;
27573 /* Don't do dynamic stack realignment for long long objects with
27574 -mpreferred-stack-boundary=2. */
27575 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27576 && (!type || !TYPE_USER_ALIGN (type))
27577 && (!decl || !DECL_USER_ALIGN (decl)))
27578 return 32;
27580 return align;
27583 /* Find a location for the static chain incoming to a nested function.
27584 This is a register, unless all free registers are used by arguments. */
27586 static rtx
27587 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27589 unsigned regno;
27591 /* While this function won't be called by the middle-end when a static
27592 chain isn't needed, it's also used throughout the backend so it's
27593 easiest to keep this check centralized. */
27594 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27595 return NULL;
27597 if (TARGET_64BIT)
27599 /* We always use R10 in 64-bit mode. */
27600 regno = R10_REG;
27602 else
27604 const_tree fntype, fndecl;
27605 unsigned int ccvt;
27607 /* By default in 32-bit mode we use ECX to pass the static chain. */
27608 regno = CX_REG;
27610 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27612 fntype = TREE_TYPE (fndecl_or_type);
27613 fndecl = fndecl_or_type;
27615 else
27617 fntype = fndecl_or_type;
27618 fndecl = NULL;
27621 ccvt = ix86_get_callcvt (fntype);
27622 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27624 /* Fastcall functions use ecx/edx for arguments, which leaves
27625 us with EAX for the static chain.
27626 Thiscall functions use ecx for arguments, which also
27627 leaves us with EAX for the static chain. */
27628 regno = AX_REG;
27630 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27632 /* Thiscall functions use ecx for arguments, which leaves
27633 us with EAX and EDX for the static chain.
27634 We are using for abi-compatibility EAX. */
27635 regno = AX_REG;
27637 else if (ix86_function_regparm (fntype, fndecl) == 3)
27639 /* For regparm 3, we have no free call-clobbered registers in
27640 which to store the static chain. In order to implement this,
27641 we have the trampoline push the static chain to the stack.
27642 However, we can't push a value below the return address when
27643 we call the nested function directly, so we have to use an
27644 alternate entry point. For this we use ESI, and have the
27645 alternate entry point push ESI, so that things appear the
27646 same once we're executing the nested function. */
27647 if (incoming_p)
27649 if (fndecl == current_function_decl)
27650 ix86_static_chain_on_stack = true;
27651 return gen_frame_mem (SImode,
27652 plus_constant (Pmode,
27653 arg_pointer_rtx, -8));
27655 regno = SI_REG;
27659 return gen_rtx_REG (Pmode, regno);
27662 /* Emit RTL insns to initialize the variable parts of a trampoline.
27663 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27664 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27665 to be passed to the target function. */
27667 static void
27668 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27670 rtx mem, fnaddr;
27671 int opcode;
27672 int offset = 0;
27674 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27676 if (TARGET_64BIT)
27678 int size;
27680 /* Load the function address to r11. Try to load address using
27681 the shorter movl instead of movabs. We may want to support
27682 movq for kernel mode, but kernel does not use trampolines at
27683 the moment. FNADDR is a 32bit address and may not be in
27684 DImode when ptr_mode == SImode. Always use movl in this
27685 case. */
27686 if (ptr_mode == SImode
27687 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27689 fnaddr = copy_addr_to_reg (fnaddr);
27691 mem = adjust_address (m_tramp, HImode, offset);
27692 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27694 mem = adjust_address (m_tramp, SImode, offset + 2);
27695 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27696 offset += 6;
27698 else
27700 mem = adjust_address (m_tramp, HImode, offset);
27701 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27703 mem = adjust_address (m_tramp, DImode, offset + 2);
27704 emit_move_insn (mem, fnaddr);
27705 offset += 10;
27708 /* Load static chain using movabs to r10. Use the shorter movl
27709 instead of movabs when ptr_mode == SImode. */
27710 if (ptr_mode == SImode)
27712 opcode = 0xba41;
27713 size = 6;
27715 else
27717 opcode = 0xba49;
27718 size = 10;
27721 mem = adjust_address (m_tramp, HImode, offset);
27722 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27724 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27725 emit_move_insn (mem, chain_value);
27726 offset += size;
27728 /* Jump to r11; the last (unused) byte is a nop, only there to
27729 pad the write out to a single 32-bit store. */
27730 mem = adjust_address (m_tramp, SImode, offset);
27731 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27732 offset += 4;
27734 else
27736 rtx disp, chain;
27738 /* Depending on the static chain location, either load a register
27739 with a constant, or push the constant to the stack. All of the
27740 instructions are the same size. */
27741 chain = ix86_static_chain (fndecl, true);
27742 if (REG_P (chain))
27744 switch (REGNO (chain))
27746 case AX_REG:
27747 opcode = 0xb8; break;
27748 case CX_REG:
27749 opcode = 0xb9; break;
27750 default:
27751 gcc_unreachable ();
27754 else
27755 opcode = 0x68;
27757 mem = adjust_address (m_tramp, QImode, offset);
27758 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27760 mem = adjust_address (m_tramp, SImode, offset + 1);
27761 emit_move_insn (mem, chain_value);
27762 offset += 5;
27764 mem = adjust_address (m_tramp, QImode, offset);
27765 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27767 mem = adjust_address (m_tramp, SImode, offset + 1);
27769 /* Compute offset from the end of the jmp to the target function.
27770 In the case in which the trampoline stores the static chain on
27771 the stack, we need to skip the first insn which pushes the
27772 (call-saved) register static chain; this push is 1 byte. */
27773 offset += 5;
27774 disp = expand_binop (SImode, sub_optab, fnaddr,
27775 plus_constant (Pmode, XEXP (m_tramp, 0),
27776 offset - (MEM_P (chain) ? 1 : 0)),
27777 NULL_RTX, 1, OPTAB_DIRECT);
27778 emit_move_insn (mem, disp);
27781 gcc_assert (offset <= TRAMPOLINE_SIZE);
27783 #ifdef HAVE_ENABLE_EXECUTE_STACK
27784 #ifdef CHECK_EXECUTE_STACK_ENABLED
27785 if (CHECK_EXECUTE_STACK_ENABLED)
27786 #endif
27787 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27788 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27789 #endif
27792 /* The following file contains several enumerations and data structures
27793 built from the definitions in i386-builtin-types.def. */
27795 #include "i386-builtin-types.inc"
27797 /* Table for the ix86 builtin non-function types. */
27798 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27800 /* Retrieve an element from the above table, building some of
27801 the types lazily. */
27803 static tree
27804 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27806 unsigned int index;
27807 tree type, itype;
27809 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27811 type = ix86_builtin_type_tab[(int) tcode];
27812 if (type != NULL)
27813 return type;
27815 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27816 if (tcode <= IX86_BT_LAST_VECT)
27818 machine_mode mode;
27820 index = tcode - IX86_BT_LAST_PRIM - 1;
27821 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27822 mode = ix86_builtin_type_vect_mode[index];
27824 type = build_vector_type_for_mode (itype, mode);
27826 else
27828 int quals;
27830 index = tcode - IX86_BT_LAST_VECT - 1;
27831 if (tcode <= IX86_BT_LAST_PTR)
27832 quals = TYPE_UNQUALIFIED;
27833 else
27834 quals = TYPE_QUAL_CONST;
27836 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27837 if (quals != TYPE_UNQUALIFIED)
27838 itype = build_qualified_type (itype, quals);
27840 type = build_pointer_type (itype);
27843 ix86_builtin_type_tab[(int) tcode] = type;
27844 return type;
27847 /* Table for the ix86 builtin function types. */
27848 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27850 /* Retrieve an element from the above table, building some of
27851 the types lazily. */
27853 static tree
27854 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27856 tree type;
27858 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27860 type = ix86_builtin_func_type_tab[(int) tcode];
27861 if (type != NULL)
27862 return type;
27864 if (tcode <= IX86_BT_LAST_FUNC)
27866 unsigned start = ix86_builtin_func_start[(int) tcode];
27867 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27868 tree rtype, atype, args = void_list_node;
27869 unsigned i;
27871 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27872 for (i = after - 1; i > start; --i)
27874 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27875 args = tree_cons (NULL, atype, args);
27878 type = build_function_type (rtype, args);
27880 else
27882 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27883 enum ix86_builtin_func_type icode;
27885 icode = ix86_builtin_func_alias_base[index];
27886 type = ix86_get_builtin_func_type (icode);
27889 ix86_builtin_func_type_tab[(int) tcode] = type;
27890 return type;
27894 /* Codes for all the SSE/MMX builtins. */
27895 enum ix86_builtins
27897 IX86_BUILTIN_ADDPS,
27898 IX86_BUILTIN_ADDSS,
27899 IX86_BUILTIN_DIVPS,
27900 IX86_BUILTIN_DIVSS,
27901 IX86_BUILTIN_MULPS,
27902 IX86_BUILTIN_MULSS,
27903 IX86_BUILTIN_SUBPS,
27904 IX86_BUILTIN_SUBSS,
27906 IX86_BUILTIN_CMPEQPS,
27907 IX86_BUILTIN_CMPLTPS,
27908 IX86_BUILTIN_CMPLEPS,
27909 IX86_BUILTIN_CMPGTPS,
27910 IX86_BUILTIN_CMPGEPS,
27911 IX86_BUILTIN_CMPNEQPS,
27912 IX86_BUILTIN_CMPNLTPS,
27913 IX86_BUILTIN_CMPNLEPS,
27914 IX86_BUILTIN_CMPNGTPS,
27915 IX86_BUILTIN_CMPNGEPS,
27916 IX86_BUILTIN_CMPORDPS,
27917 IX86_BUILTIN_CMPUNORDPS,
27918 IX86_BUILTIN_CMPEQSS,
27919 IX86_BUILTIN_CMPLTSS,
27920 IX86_BUILTIN_CMPLESS,
27921 IX86_BUILTIN_CMPNEQSS,
27922 IX86_BUILTIN_CMPNLTSS,
27923 IX86_BUILTIN_CMPNLESS,
27924 IX86_BUILTIN_CMPORDSS,
27925 IX86_BUILTIN_CMPUNORDSS,
27927 IX86_BUILTIN_COMIEQSS,
27928 IX86_BUILTIN_COMILTSS,
27929 IX86_BUILTIN_COMILESS,
27930 IX86_BUILTIN_COMIGTSS,
27931 IX86_BUILTIN_COMIGESS,
27932 IX86_BUILTIN_COMINEQSS,
27933 IX86_BUILTIN_UCOMIEQSS,
27934 IX86_BUILTIN_UCOMILTSS,
27935 IX86_BUILTIN_UCOMILESS,
27936 IX86_BUILTIN_UCOMIGTSS,
27937 IX86_BUILTIN_UCOMIGESS,
27938 IX86_BUILTIN_UCOMINEQSS,
27940 IX86_BUILTIN_CVTPI2PS,
27941 IX86_BUILTIN_CVTPS2PI,
27942 IX86_BUILTIN_CVTSI2SS,
27943 IX86_BUILTIN_CVTSI642SS,
27944 IX86_BUILTIN_CVTSS2SI,
27945 IX86_BUILTIN_CVTSS2SI64,
27946 IX86_BUILTIN_CVTTPS2PI,
27947 IX86_BUILTIN_CVTTSS2SI,
27948 IX86_BUILTIN_CVTTSS2SI64,
27950 IX86_BUILTIN_MAXPS,
27951 IX86_BUILTIN_MAXSS,
27952 IX86_BUILTIN_MINPS,
27953 IX86_BUILTIN_MINSS,
27955 IX86_BUILTIN_LOADUPS,
27956 IX86_BUILTIN_STOREUPS,
27957 IX86_BUILTIN_MOVSS,
27959 IX86_BUILTIN_MOVHLPS,
27960 IX86_BUILTIN_MOVLHPS,
27961 IX86_BUILTIN_LOADHPS,
27962 IX86_BUILTIN_LOADLPS,
27963 IX86_BUILTIN_STOREHPS,
27964 IX86_BUILTIN_STORELPS,
27966 IX86_BUILTIN_MASKMOVQ,
27967 IX86_BUILTIN_MOVMSKPS,
27968 IX86_BUILTIN_PMOVMSKB,
27970 IX86_BUILTIN_MOVNTPS,
27971 IX86_BUILTIN_MOVNTQ,
27973 IX86_BUILTIN_LOADDQU,
27974 IX86_BUILTIN_STOREDQU,
27976 IX86_BUILTIN_PACKSSWB,
27977 IX86_BUILTIN_PACKSSDW,
27978 IX86_BUILTIN_PACKUSWB,
27980 IX86_BUILTIN_PADDB,
27981 IX86_BUILTIN_PADDW,
27982 IX86_BUILTIN_PADDD,
27983 IX86_BUILTIN_PADDQ,
27984 IX86_BUILTIN_PADDSB,
27985 IX86_BUILTIN_PADDSW,
27986 IX86_BUILTIN_PADDUSB,
27987 IX86_BUILTIN_PADDUSW,
27988 IX86_BUILTIN_PSUBB,
27989 IX86_BUILTIN_PSUBW,
27990 IX86_BUILTIN_PSUBD,
27991 IX86_BUILTIN_PSUBQ,
27992 IX86_BUILTIN_PSUBSB,
27993 IX86_BUILTIN_PSUBSW,
27994 IX86_BUILTIN_PSUBUSB,
27995 IX86_BUILTIN_PSUBUSW,
27997 IX86_BUILTIN_PAND,
27998 IX86_BUILTIN_PANDN,
27999 IX86_BUILTIN_POR,
28000 IX86_BUILTIN_PXOR,
28002 IX86_BUILTIN_PAVGB,
28003 IX86_BUILTIN_PAVGW,
28005 IX86_BUILTIN_PCMPEQB,
28006 IX86_BUILTIN_PCMPEQW,
28007 IX86_BUILTIN_PCMPEQD,
28008 IX86_BUILTIN_PCMPGTB,
28009 IX86_BUILTIN_PCMPGTW,
28010 IX86_BUILTIN_PCMPGTD,
28012 IX86_BUILTIN_PMADDWD,
28014 IX86_BUILTIN_PMAXSW,
28015 IX86_BUILTIN_PMAXUB,
28016 IX86_BUILTIN_PMINSW,
28017 IX86_BUILTIN_PMINUB,
28019 IX86_BUILTIN_PMULHUW,
28020 IX86_BUILTIN_PMULHW,
28021 IX86_BUILTIN_PMULLW,
28023 IX86_BUILTIN_PSADBW,
28024 IX86_BUILTIN_PSHUFW,
28026 IX86_BUILTIN_PSLLW,
28027 IX86_BUILTIN_PSLLD,
28028 IX86_BUILTIN_PSLLQ,
28029 IX86_BUILTIN_PSRAW,
28030 IX86_BUILTIN_PSRAD,
28031 IX86_BUILTIN_PSRLW,
28032 IX86_BUILTIN_PSRLD,
28033 IX86_BUILTIN_PSRLQ,
28034 IX86_BUILTIN_PSLLWI,
28035 IX86_BUILTIN_PSLLDI,
28036 IX86_BUILTIN_PSLLQI,
28037 IX86_BUILTIN_PSRAWI,
28038 IX86_BUILTIN_PSRADI,
28039 IX86_BUILTIN_PSRLWI,
28040 IX86_BUILTIN_PSRLDI,
28041 IX86_BUILTIN_PSRLQI,
28043 IX86_BUILTIN_PUNPCKHBW,
28044 IX86_BUILTIN_PUNPCKHWD,
28045 IX86_BUILTIN_PUNPCKHDQ,
28046 IX86_BUILTIN_PUNPCKLBW,
28047 IX86_BUILTIN_PUNPCKLWD,
28048 IX86_BUILTIN_PUNPCKLDQ,
28050 IX86_BUILTIN_SHUFPS,
28052 IX86_BUILTIN_RCPPS,
28053 IX86_BUILTIN_RCPSS,
28054 IX86_BUILTIN_RSQRTPS,
28055 IX86_BUILTIN_RSQRTPS_NR,
28056 IX86_BUILTIN_RSQRTSS,
28057 IX86_BUILTIN_RSQRTF,
28058 IX86_BUILTIN_SQRTPS,
28059 IX86_BUILTIN_SQRTPS_NR,
28060 IX86_BUILTIN_SQRTSS,
28062 IX86_BUILTIN_UNPCKHPS,
28063 IX86_BUILTIN_UNPCKLPS,
28065 IX86_BUILTIN_ANDPS,
28066 IX86_BUILTIN_ANDNPS,
28067 IX86_BUILTIN_ORPS,
28068 IX86_BUILTIN_XORPS,
28070 IX86_BUILTIN_EMMS,
28071 IX86_BUILTIN_LDMXCSR,
28072 IX86_BUILTIN_STMXCSR,
28073 IX86_BUILTIN_SFENCE,
28075 IX86_BUILTIN_FXSAVE,
28076 IX86_BUILTIN_FXRSTOR,
28077 IX86_BUILTIN_FXSAVE64,
28078 IX86_BUILTIN_FXRSTOR64,
28080 IX86_BUILTIN_XSAVE,
28081 IX86_BUILTIN_XRSTOR,
28082 IX86_BUILTIN_XSAVE64,
28083 IX86_BUILTIN_XRSTOR64,
28085 IX86_BUILTIN_XSAVEOPT,
28086 IX86_BUILTIN_XSAVEOPT64,
28088 IX86_BUILTIN_XSAVEC,
28089 IX86_BUILTIN_XSAVEC64,
28091 IX86_BUILTIN_XSAVES,
28092 IX86_BUILTIN_XRSTORS,
28093 IX86_BUILTIN_XSAVES64,
28094 IX86_BUILTIN_XRSTORS64,
28096 /* 3DNow! Original */
28097 IX86_BUILTIN_FEMMS,
28098 IX86_BUILTIN_PAVGUSB,
28099 IX86_BUILTIN_PF2ID,
28100 IX86_BUILTIN_PFACC,
28101 IX86_BUILTIN_PFADD,
28102 IX86_BUILTIN_PFCMPEQ,
28103 IX86_BUILTIN_PFCMPGE,
28104 IX86_BUILTIN_PFCMPGT,
28105 IX86_BUILTIN_PFMAX,
28106 IX86_BUILTIN_PFMIN,
28107 IX86_BUILTIN_PFMUL,
28108 IX86_BUILTIN_PFRCP,
28109 IX86_BUILTIN_PFRCPIT1,
28110 IX86_BUILTIN_PFRCPIT2,
28111 IX86_BUILTIN_PFRSQIT1,
28112 IX86_BUILTIN_PFRSQRT,
28113 IX86_BUILTIN_PFSUB,
28114 IX86_BUILTIN_PFSUBR,
28115 IX86_BUILTIN_PI2FD,
28116 IX86_BUILTIN_PMULHRW,
28118 /* 3DNow! Athlon Extensions */
28119 IX86_BUILTIN_PF2IW,
28120 IX86_BUILTIN_PFNACC,
28121 IX86_BUILTIN_PFPNACC,
28122 IX86_BUILTIN_PI2FW,
28123 IX86_BUILTIN_PSWAPDSI,
28124 IX86_BUILTIN_PSWAPDSF,
28126 /* SSE2 */
28127 IX86_BUILTIN_ADDPD,
28128 IX86_BUILTIN_ADDSD,
28129 IX86_BUILTIN_DIVPD,
28130 IX86_BUILTIN_DIVSD,
28131 IX86_BUILTIN_MULPD,
28132 IX86_BUILTIN_MULSD,
28133 IX86_BUILTIN_SUBPD,
28134 IX86_BUILTIN_SUBSD,
28136 IX86_BUILTIN_CMPEQPD,
28137 IX86_BUILTIN_CMPLTPD,
28138 IX86_BUILTIN_CMPLEPD,
28139 IX86_BUILTIN_CMPGTPD,
28140 IX86_BUILTIN_CMPGEPD,
28141 IX86_BUILTIN_CMPNEQPD,
28142 IX86_BUILTIN_CMPNLTPD,
28143 IX86_BUILTIN_CMPNLEPD,
28144 IX86_BUILTIN_CMPNGTPD,
28145 IX86_BUILTIN_CMPNGEPD,
28146 IX86_BUILTIN_CMPORDPD,
28147 IX86_BUILTIN_CMPUNORDPD,
28148 IX86_BUILTIN_CMPEQSD,
28149 IX86_BUILTIN_CMPLTSD,
28150 IX86_BUILTIN_CMPLESD,
28151 IX86_BUILTIN_CMPNEQSD,
28152 IX86_BUILTIN_CMPNLTSD,
28153 IX86_BUILTIN_CMPNLESD,
28154 IX86_BUILTIN_CMPORDSD,
28155 IX86_BUILTIN_CMPUNORDSD,
28157 IX86_BUILTIN_COMIEQSD,
28158 IX86_BUILTIN_COMILTSD,
28159 IX86_BUILTIN_COMILESD,
28160 IX86_BUILTIN_COMIGTSD,
28161 IX86_BUILTIN_COMIGESD,
28162 IX86_BUILTIN_COMINEQSD,
28163 IX86_BUILTIN_UCOMIEQSD,
28164 IX86_BUILTIN_UCOMILTSD,
28165 IX86_BUILTIN_UCOMILESD,
28166 IX86_BUILTIN_UCOMIGTSD,
28167 IX86_BUILTIN_UCOMIGESD,
28168 IX86_BUILTIN_UCOMINEQSD,
28170 IX86_BUILTIN_MAXPD,
28171 IX86_BUILTIN_MAXSD,
28172 IX86_BUILTIN_MINPD,
28173 IX86_BUILTIN_MINSD,
28175 IX86_BUILTIN_ANDPD,
28176 IX86_BUILTIN_ANDNPD,
28177 IX86_BUILTIN_ORPD,
28178 IX86_BUILTIN_XORPD,
28180 IX86_BUILTIN_SQRTPD,
28181 IX86_BUILTIN_SQRTSD,
28183 IX86_BUILTIN_UNPCKHPD,
28184 IX86_BUILTIN_UNPCKLPD,
28186 IX86_BUILTIN_SHUFPD,
28188 IX86_BUILTIN_LOADUPD,
28189 IX86_BUILTIN_STOREUPD,
28190 IX86_BUILTIN_MOVSD,
28192 IX86_BUILTIN_LOADHPD,
28193 IX86_BUILTIN_LOADLPD,
28195 IX86_BUILTIN_CVTDQ2PD,
28196 IX86_BUILTIN_CVTDQ2PS,
28198 IX86_BUILTIN_CVTPD2DQ,
28199 IX86_BUILTIN_CVTPD2PI,
28200 IX86_BUILTIN_CVTPD2PS,
28201 IX86_BUILTIN_CVTTPD2DQ,
28202 IX86_BUILTIN_CVTTPD2PI,
28204 IX86_BUILTIN_CVTPI2PD,
28205 IX86_BUILTIN_CVTSI2SD,
28206 IX86_BUILTIN_CVTSI642SD,
28208 IX86_BUILTIN_CVTSD2SI,
28209 IX86_BUILTIN_CVTSD2SI64,
28210 IX86_BUILTIN_CVTSD2SS,
28211 IX86_BUILTIN_CVTSS2SD,
28212 IX86_BUILTIN_CVTTSD2SI,
28213 IX86_BUILTIN_CVTTSD2SI64,
28215 IX86_BUILTIN_CVTPS2DQ,
28216 IX86_BUILTIN_CVTPS2PD,
28217 IX86_BUILTIN_CVTTPS2DQ,
28219 IX86_BUILTIN_MOVNTI,
28220 IX86_BUILTIN_MOVNTI64,
28221 IX86_BUILTIN_MOVNTPD,
28222 IX86_BUILTIN_MOVNTDQ,
28224 IX86_BUILTIN_MOVQ128,
28226 /* SSE2 MMX */
28227 IX86_BUILTIN_MASKMOVDQU,
28228 IX86_BUILTIN_MOVMSKPD,
28229 IX86_BUILTIN_PMOVMSKB128,
28231 IX86_BUILTIN_PACKSSWB128,
28232 IX86_BUILTIN_PACKSSDW128,
28233 IX86_BUILTIN_PACKUSWB128,
28235 IX86_BUILTIN_PADDB128,
28236 IX86_BUILTIN_PADDW128,
28237 IX86_BUILTIN_PADDD128,
28238 IX86_BUILTIN_PADDQ128,
28239 IX86_BUILTIN_PADDSB128,
28240 IX86_BUILTIN_PADDSW128,
28241 IX86_BUILTIN_PADDUSB128,
28242 IX86_BUILTIN_PADDUSW128,
28243 IX86_BUILTIN_PSUBB128,
28244 IX86_BUILTIN_PSUBW128,
28245 IX86_BUILTIN_PSUBD128,
28246 IX86_BUILTIN_PSUBQ128,
28247 IX86_BUILTIN_PSUBSB128,
28248 IX86_BUILTIN_PSUBSW128,
28249 IX86_BUILTIN_PSUBUSB128,
28250 IX86_BUILTIN_PSUBUSW128,
28252 IX86_BUILTIN_PAND128,
28253 IX86_BUILTIN_PANDN128,
28254 IX86_BUILTIN_POR128,
28255 IX86_BUILTIN_PXOR128,
28257 IX86_BUILTIN_PAVGB128,
28258 IX86_BUILTIN_PAVGW128,
28260 IX86_BUILTIN_PCMPEQB128,
28261 IX86_BUILTIN_PCMPEQW128,
28262 IX86_BUILTIN_PCMPEQD128,
28263 IX86_BUILTIN_PCMPGTB128,
28264 IX86_BUILTIN_PCMPGTW128,
28265 IX86_BUILTIN_PCMPGTD128,
28267 IX86_BUILTIN_PMADDWD128,
28269 IX86_BUILTIN_PMAXSW128,
28270 IX86_BUILTIN_PMAXUB128,
28271 IX86_BUILTIN_PMINSW128,
28272 IX86_BUILTIN_PMINUB128,
28274 IX86_BUILTIN_PMULUDQ,
28275 IX86_BUILTIN_PMULUDQ128,
28276 IX86_BUILTIN_PMULHUW128,
28277 IX86_BUILTIN_PMULHW128,
28278 IX86_BUILTIN_PMULLW128,
28280 IX86_BUILTIN_PSADBW128,
28281 IX86_BUILTIN_PSHUFHW,
28282 IX86_BUILTIN_PSHUFLW,
28283 IX86_BUILTIN_PSHUFD,
28285 IX86_BUILTIN_PSLLDQI128,
28286 IX86_BUILTIN_PSLLWI128,
28287 IX86_BUILTIN_PSLLDI128,
28288 IX86_BUILTIN_PSLLQI128,
28289 IX86_BUILTIN_PSRAWI128,
28290 IX86_BUILTIN_PSRADI128,
28291 IX86_BUILTIN_PSRLDQI128,
28292 IX86_BUILTIN_PSRLWI128,
28293 IX86_BUILTIN_PSRLDI128,
28294 IX86_BUILTIN_PSRLQI128,
28296 IX86_BUILTIN_PSLLDQ128,
28297 IX86_BUILTIN_PSLLW128,
28298 IX86_BUILTIN_PSLLD128,
28299 IX86_BUILTIN_PSLLQ128,
28300 IX86_BUILTIN_PSRAW128,
28301 IX86_BUILTIN_PSRAD128,
28302 IX86_BUILTIN_PSRLW128,
28303 IX86_BUILTIN_PSRLD128,
28304 IX86_BUILTIN_PSRLQ128,
28306 IX86_BUILTIN_PUNPCKHBW128,
28307 IX86_BUILTIN_PUNPCKHWD128,
28308 IX86_BUILTIN_PUNPCKHDQ128,
28309 IX86_BUILTIN_PUNPCKHQDQ128,
28310 IX86_BUILTIN_PUNPCKLBW128,
28311 IX86_BUILTIN_PUNPCKLWD128,
28312 IX86_BUILTIN_PUNPCKLDQ128,
28313 IX86_BUILTIN_PUNPCKLQDQ128,
28315 IX86_BUILTIN_CLFLUSH,
28316 IX86_BUILTIN_MFENCE,
28317 IX86_BUILTIN_LFENCE,
28318 IX86_BUILTIN_PAUSE,
28320 IX86_BUILTIN_FNSTENV,
28321 IX86_BUILTIN_FLDENV,
28322 IX86_BUILTIN_FNSTSW,
28323 IX86_BUILTIN_FNCLEX,
28325 IX86_BUILTIN_BSRSI,
28326 IX86_BUILTIN_BSRDI,
28327 IX86_BUILTIN_RDPMC,
28328 IX86_BUILTIN_RDTSC,
28329 IX86_BUILTIN_RDTSCP,
28330 IX86_BUILTIN_ROLQI,
28331 IX86_BUILTIN_ROLHI,
28332 IX86_BUILTIN_RORQI,
28333 IX86_BUILTIN_RORHI,
28335 /* SSE3. */
28336 IX86_BUILTIN_ADDSUBPS,
28337 IX86_BUILTIN_HADDPS,
28338 IX86_BUILTIN_HSUBPS,
28339 IX86_BUILTIN_MOVSHDUP,
28340 IX86_BUILTIN_MOVSLDUP,
28341 IX86_BUILTIN_ADDSUBPD,
28342 IX86_BUILTIN_HADDPD,
28343 IX86_BUILTIN_HSUBPD,
28344 IX86_BUILTIN_LDDQU,
28346 IX86_BUILTIN_MONITOR,
28347 IX86_BUILTIN_MWAIT,
28349 /* SSSE3. */
28350 IX86_BUILTIN_PHADDW,
28351 IX86_BUILTIN_PHADDD,
28352 IX86_BUILTIN_PHADDSW,
28353 IX86_BUILTIN_PHSUBW,
28354 IX86_BUILTIN_PHSUBD,
28355 IX86_BUILTIN_PHSUBSW,
28356 IX86_BUILTIN_PMADDUBSW,
28357 IX86_BUILTIN_PMULHRSW,
28358 IX86_BUILTIN_PSHUFB,
28359 IX86_BUILTIN_PSIGNB,
28360 IX86_BUILTIN_PSIGNW,
28361 IX86_BUILTIN_PSIGND,
28362 IX86_BUILTIN_PALIGNR,
28363 IX86_BUILTIN_PABSB,
28364 IX86_BUILTIN_PABSW,
28365 IX86_BUILTIN_PABSD,
28367 IX86_BUILTIN_PHADDW128,
28368 IX86_BUILTIN_PHADDD128,
28369 IX86_BUILTIN_PHADDSW128,
28370 IX86_BUILTIN_PHSUBW128,
28371 IX86_BUILTIN_PHSUBD128,
28372 IX86_BUILTIN_PHSUBSW128,
28373 IX86_BUILTIN_PMADDUBSW128,
28374 IX86_BUILTIN_PMULHRSW128,
28375 IX86_BUILTIN_PSHUFB128,
28376 IX86_BUILTIN_PSIGNB128,
28377 IX86_BUILTIN_PSIGNW128,
28378 IX86_BUILTIN_PSIGND128,
28379 IX86_BUILTIN_PALIGNR128,
28380 IX86_BUILTIN_PABSB128,
28381 IX86_BUILTIN_PABSW128,
28382 IX86_BUILTIN_PABSD128,
28384 /* AMDFAM10 - SSE4A New Instructions. */
28385 IX86_BUILTIN_MOVNTSD,
28386 IX86_BUILTIN_MOVNTSS,
28387 IX86_BUILTIN_EXTRQI,
28388 IX86_BUILTIN_EXTRQ,
28389 IX86_BUILTIN_INSERTQI,
28390 IX86_BUILTIN_INSERTQ,
28392 /* SSE4.1. */
28393 IX86_BUILTIN_BLENDPD,
28394 IX86_BUILTIN_BLENDPS,
28395 IX86_BUILTIN_BLENDVPD,
28396 IX86_BUILTIN_BLENDVPS,
28397 IX86_BUILTIN_PBLENDVB128,
28398 IX86_BUILTIN_PBLENDW128,
28400 IX86_BUILTIN_DPPD,
28401 IX86_BUILTIN_DPPS,
28403 IX86_BUILTIN_INSERTPS128,
28405 IX86_BUILTIN_MOVNTDQA,
28406 IX86_BUILTIN_MPSADBW128,
28407 IX86_BUILTIN_PACKUSDW128,
28408 IX86_BUILTIN_PCMPEQQ,
28409 IX86_BUILTIN_PHMINPOSUW128,
28411 IX86_BUILTIN_PMAXSB128,
28412 IX86_BUILTIN_PMAXSD128,
28413 IX86_BUILTIN_PMAXUD128,
28414 IX86_BUILTIN_PMAXUW128,
28416 IX86_BUILTIN_PMINSB128,
28417 IX86_BUILTIN_PMINSD128,
28418 IX86_BUILTIN_PMINUD128,
28419 IX86_BUILTIN_PMINUW128,
28421 IX86_BUILTIN_PMOVSXBW128,
28422 IX86_BUILTIN_PMOVSXBD128,
28423 IX86_BUILTIN_PMOVSXBQ128,
28424 IX86_BUILTIN_PMOVSXWD128,
28425 IX86_BUILTIN_PMOVSXWQ128,
28426 IX86_BUILTIN_PMOVSXDQ128,
28428 IX86_BUILTIN_PMOVZXBW128,
28429 IX86_BUILTIN_PMOVZXBD128,
28430 IX86_BUILTIN_PMOVZXBQ128,
28431 IX86_BUILTIN_PMOVZXWD128,
28432 IX86_BUILTIN_PMOVZXWQ128,
28433 IX86_BUILTIN_PMOVZXDQ128,
28435 IX86_BUILTIN_PMULDQ128,
28436 IX86_BUILTIN_PMULLD128,
28438 IX86_BUILTIN_ROUNDSD,
28439 IX86_BUILTIN_ROUNDSS,
28441 IX86_BUILTIN_ROUNDPD,
28442 IX86_BUILTIN_ROUNDPS,
28444 IX86_BUILTIN_FLOORPD,
28445 IX86_BUILTIN_CEILPD,
28446 IX86_BUILTIN_TRUNCPD,
28447 IX86_BUILTIN_RINTPD,
28448 IX86_BUILTIN_ROUNDPD_AZ,
28450 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28451 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28452 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28454 IX86_BUILTIN_FLOORPS,
28455 IX86_BUILTIN_CEILPS,
28456 IX86_BUILTIN_TRUNCPS,
28457 IX86_BUILTIN_RINTPS,
28458 IX86_BUILTIN_ROUNDPS_AZ,
28460 IX86_BUILTIN_FLOORPS_SFIX,
28461 IX86_BUILTIN_CEILPS_SFIX,
28462 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28464 IX86_BUILTIN_PTESTZ,
28465 IX86_BUILTIN_PTESTC,
28466 IX86_BUILTIN_PTESTNZC,
28468 IX86_BUILTIN_VEC_INIT_V2SI,
28469 IX86_BUILTIN_VEC_INIT_V4HI,
28470 IX86_BUILTIN_VEC_INIT_V8QI,
28471 IX86_BUILTIN_VEC_EXT_V2DF,
28472 IX86_BUILTIN_VEC_EXT_V2DI,
28473 IX86_BUILTIN_VEC_EXT_V4SF,
28474 IX86_BUILTIN_VEC_EXT_V4SI,
28475 IX86_BUILTIN_VEC_EXT_V8HI,
28476 IX86_BUILTIN_VEC_EXT_V2SI,
28477 IX86_BUILTIN_VEC_EXT_V4HI,
28478 IX86_BUILTIN_VEC_EXT_V16QI,
28479 IX86_BUILTIN_VEC_SET_V2DI,
28480 IX86_BUILTIN_VEC_SET_V4SF,
28481 IX86_BUILTIN_VEC_SET_V4SI,
28482 IX86_BUILTIN_VEC_SET_V8HI,
28483 IX86_BUILTIN_VEC_SET_V4HI,
28484 IX86_BUILTIN_VEC_SET_V16QI,
28486 IX86_BUILTIN_VEC_PACK_SFIX,
28487 IX86_BUILTIN_VEC_PACK_SFIX256,
28489 /* SSE4.2. */
28490 IX86_BUILTIN_CRC32QI,
28491 IX86_BUILTIN_CRC32HI,
28492 IX86_BUILTIN_CRC32SI,
28493 IX86_BUILTIN_CRC32DI,
28495 IX86_BUILTIN_PCMPESTRI128,
28496 IX86_BUILTIN_PCMPESTRM128,
28497 IX86_BUILTIN_PCMPESTRA128,
28498 IX86_BUILTIN_PCMPESTRC128,
28499 IX86_BUILTIN_PCMPESTRO128,
28500 IX86_BUILTIN_PCMPESTRS128,
28501 IX86_BUILTIN_PCMPESTRZ128,
28502 IX86_BUILTIN_PCMPISTRI128,
28503 IX86_BUILTIN_PCMPISTRM128,
28504 IX86_BUILTIN_PCMPISTRA128,
28505 IX86_BUILTIN_PCMPISTRC128,
28506 IX86_BUILTIN_PCMPISTRO128,
28507 IX86_BUILTIN_PCMPISTRS128,
28508 IX86_BUILTIN_PCMPISTRZ128,
28510 IX86_BUILTIN_PCMPGTQ,
28512 /* AES instructions */
28513 IX86_BUILTIN_AESENC128,
28514 IX86_BUILTIN_AESENCLAST128,
28515 IX86_BUILTIN_AESDEC128,
28516 IX86_BUILTIN_AESDECLAST128,
28517 IX86_BUILTIN_AESIMC128,
28518 IX86_BUILTIN_AESKEYGENASSIST128,
28520 /* PCLMUL instruction */
28521 IX86_BUILTIN_PCLMULQDQ128,
28523 /* AVX */
28524 IX86_BUILTIN_ADDPD256,
28525 IX86_BUILTIN_ADDPS256,
28526 IX86_BUILTIN_ADDSUBPD256,
28527 IX86_BUILTIN_ADDSUBPS256,
28528 IX86_BUILTIN_ANDPD256,
28529 IX86_BUILTIN_ANDPS256,
28530 IX86_BUILTIN_ANDNPD256,
28531 IX86_BUILTIN_ANDNPS256,
28532 IX86_BUILTIN_BLENDPD256,
28533 IX86_BUILTIN_BLENDPS256,
28534 IX86_BUILTIN_BLENDVPD256,
28535 IX86_BUILTIN_BLENDVPS256,
28536 IX86_BUILTIN_DIVPD256,
28537 IX86_BUILTIN_DIVPS256,
28538 IX86_BUILTIN_DPPS256,
28539 IX86_BUILTIN_HADDPD256,
28540 IX86_BUILTIN_HADDPS256,
28541 IX86_BUILTIN_HSUBPD256,
28542 IX86_BUILTIN_HSUBPS256,
28543 IX86_BUILTIN_MAXPD256,
28544 IX86_BUILTIN_MAXPS256,
28545 IX86_BUILTIN_MINPD256,
28546 IX86_BUILTIN_MINPS256,
28547 IX86_BUILTIN_MULPD256,
28548 IX86_BUILTIN_MULPS256,
28549 IX86_BUILTIN_ORPD256,
28550 IX86_BUILTIN_ORPS256,
28551 IX86_BUILTIN_SHUFPD256,
28552 IX86_BUILTIN_SHUFPS256,
28553 IX86_BUILTIN_SUBPD256,
28554 IX86_BUILTIN_SUBPS256,
28555 IX86_BUILTIN_XORPD256,
28556 IX86_BUILTIN_XORPS256,
28557 IX86_BUILTIN_CMPSD,
28558 IX86_BUILTIN_CMPSS,
28559 IX86_BUILTIN_CMPPD,
28560 IX86_BUILTIN_CMPPS,
28561 IX86_BUILTIN_CMPPD256,
28562 IX86_BUILTIN_CMPPS256,
28563 IX86_BUILTIN_CVTDQ2PD256,
28564 IX86_BUILTIN_CVTDQ2PS256,
28565 IX86_BUILTIN_CVTPD2PS256,
28566 IX86_BUILTIN_CVTPS2DQ256,
28567 IX86_BUILTIN_CVTPS2PD256,
28568 IX86_BUILTIN_CVTTPD2DQ256,
28569 IX86_BUILTIN_CVTPD2DQ256,
28570 IX86_BUILTIN_CVTTPS2DQ256,
28571 IX86_BUILTIN_EXTRACTF128PD256,
28572 IX86_BUILTIN_EXTRACTF128PS256,
28573 IX86_BUILTIN_EXTRACTF128SI256,
28574 IX86_BUILTIN_VZEROALL,
28575 IX86_BUILTIN_VZEROUPPER,
28576 IX86_BUILTIN_VPERMILVARPD,
28577 IX86_BUILTIN_VPERMILVARPS,
28578 IX86_BUILTIN_VPERMILVARPD256,
28579 IX86_BUILTIN_VPERMILVARPS256,
28580 IX86_BUILTIN_VPERMILPD,
28581 IX86_BUILTIN_VPERMILPS,
28582 IX86_BUILTIN_VPERMILPD256,
28583 IX86_BUILTIN_VPERMILPS256,
28584 IX86_BUILTIN_VPERMIL2PD,
28585 IX86_BUILTIN_VPERMIL2PS,
28586 IX86_BUILTIN_VPERMIL2PD256,
28587 IX86_BUILTIN_VPERMIL2PS256,
28588 IX86_BUILTIN_VPERM2F128PD256,
28589 IX86_BUILTIN_VPERM2F128PS256,
28590 IX86_BUILTIN_VPERM2F128SI256,
28591 IX86_BUILTIN_VBROADCASTSS,
28592 IX86_BUILTIN_VBROADCASTSD256,
28593 IX86_BUILTIN_VBROADCASTSS256,
28594 IX86_BUILTIN_VBROADCASTPD256,
28595 IX86_BUILTIN_VBROADCASTPS256,
28596 IX86_BUILTIN_VINSERTF128PD256,
28597 IX86_BUILTIN_VINSERTF128PS256,
28598 IX86_BUILTIN_VINSERTF128SI256,
28599 IX86_BUILTIN_LOADUPD256,
28600 IX86_BUILTIN_LOADUPS256,
28601 IX86_BUILTIN_STOREUPD256,
28602 IX86_BUILTIN_STOREUPS256,
28603 IX86_BUILTIN_LDDQU256,
28604 IX86_BUILTIN_MOVNTDQ256,
28605 IX86_BUILTIN_MOVNTPD256,
28606 IX86_BUILTIN_MOVNTPS256,
28607 IX86_BUILTIN_LOADDQU256,
28608 IX86_BUILTIN_STOREDQU256,
28609 IX86_BUILTIN_MASKLOADPD,
28610 IX86_BUILTIN_MASKLOADPS,
28611 IX86_BUILTIN_MASKSTOREPD,
28612 IX86_BUILTIN_MASKSTOREPS,
28613 IX86_BUILTIN_MASKLOADPD256,
28614 IX86_BUILTIN_MASKLOADPS256,
28615 IX86_BUILTIN_MASKSTOREPD256,
28616 IX86_BUILTIN_MASKSTOREPS256,
28617 IX86_BUILTIN_MOVSHDUP256,
28618 IX86_BUILTIN_MOVSLDUP256,
28619 IX86_BUILTIN_MOVDDUP256,
28621 IX86_BUILTIN_SQRTPD256,
28622 IX86_BUILTIN_SQRTPS256,
28623 IX86_BUILTIN_SQRTPS_NR256,
28624 IX86_BUILTIN_RSQRTPS256,
28625 IX86_BUILTIN_RSQRTPS_NR256,
28627 IX86_BUILTIN_RCPPS256,
28629 IX86_BUILTIN_ROUNDPD256,
28630 IX86_BUILTIN_ROUNDPS256,
28632 IX86_BUILTIN_FLOORPD256,
28633 IX86_BUILTIN_CEILPD256,
28634 IX86_BUILTIN_TRUNCPD256,
28635 IX86_BUILTIN_RINTPD256,
28636 IX86_BUILTIN_ROUNDPD_AZ256,
28638 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28639 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28640 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28642 IX86_BUILTIN_FLOORPS256,
28643 IX86_BUILTIN_CEILPS256,
28644 IX86_BUILTIN_TRUNCPS256,
28645 IX86_BUILTIN_RINTPS256,
28646 IX86_BUILTIN_ROUNDPS_AZ256,
28648 IX86_BUILTIN_FLOORPS_SFIX256,
28649 IX86_BUILTIN_CEILPS_SFIX256,
28650 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28652 IX86_BUILTIN_UNPCKHPD256,
28653 IX86_BUILTIN_UNPCKLPD256,
28654 IX86_BUILTIN_UNPCKHPS256,
28655 IX86_BUILTIN_UNPCKLPS256,
28657 IX86_BUILTIN_SI256_SI,
28658 IX86_BUILTIN_PS256_PS,
28659 IX86_BUILTIN_PD256_PD,
28660 IX86_BUILTIN_SI_SI256,
28661 IX86_BUILTIN_PS_PS256,
28662 IX86_BUILTIN_PD_PD256,
28664 IX86_BUILTIN_VTESTZPD,
28665 IX86_BUILTIN_VTESTCPD,
28666 IX86_BUILTIN_VTESTNZCPD,
28667 IX86_BUILTIN_VTESTZPS,
28668 IX86_BUILTIN_VTESTCPS,
28669 IX86_BUILTIN_VTESTNZCPS,
28670 IX86_BUILTIN_VTESTZPD256,
28671 IX86_BUILTIN_VTESTCPD256,
28672 IX86_BUILTIN_VTESTNZCPD256,
28673 IX86_BUILTIN_VTESTZPS256,
28674 IX86_BUILTIN_VTESTCPS256,
28675 IX86_BUILTIN_VTESTNZCPS256,
28676 IX86_BUILTIN_PTESTZ256,
28677 IX86_BUILTIN_PTESTC256,
28678 IX86_BUILTIN_PTESTNZC256,
28680 IX86_BUILTIN_MOVMSKPD256,
28681 IX86_BUILTIN_MOVMSKPS256,
28683 /* AVX2 */
28684 IX86_BUILTIN_MPSADBW256,
28685 IX86_BUILTIN_PABSB256,
28686 IX86_BUILTIN_PABSW256,
28687 IX86_BUILTIN_PABSD256,
28688 IX86_BUILTIN_PACKSSDW256,
28689 IX86_BUILTIN_PACKSSWB256,
28690 IX86_BUILTIN_PACKUSDW256,
28691 IX86_BUILTIN_PACKUSWB256,
28692 IX86_BUILTIN_PADDB256,
28693 IX86_BUILTIN_PADDW256,
28694 IX86_BUILTIN_PADDD256,
28695 IX86_BUILTIN_PADDQ256,
28696 IX86_BUILTIN_PADDSB256,
28697 IX86_BUILTIN_PADDSW256,
28698 IX86_BUILTIN_PADDUSB256,
28699 IX86_BUILTIN_PADDUSW256,
28700 IX86_BUILTIN_PALIGNR256,
28701 IX86_BUILTIN_AND256I,
28702 IX86_BUILTIN_ANDNOT256I,
28703 IX86_BUILTIN_PAVGB256,
28704 IX86_BUILTIN_PAVGW256,
28705 IX86_BUILTIN_PBLENDVB256,
28706 IX86_BUILTIN_PBLENDVW256,
28707 IX86_BUILTIN_PCMPEQB256,
28708 IX86_BUILTIN_PCMPEQW256,
28709 IX86_BUILTIN_PCMPEQD256,
28710 IX86_BUILTIN_PCMPEQQ256,
28711 IX86_BUILTIN_PCMPGTB256,
28712 IX86_BUILTIN_PCMPGTW256,
28713 IX86_BUILTIN_PCMPGTD256,
28714 IX86_BUILTIN_PCMPGTQ256,
28715 IX86_BUILTIN_PHADDW256,
28716 IX86_BUILTIN_PHADDD256,
28717 IX86_BUILTIN_PHADDSW256,
28718 IX86_BUILTIN_PHSUBW256,
28719 IX86_BUILTIN_PHSUBD256,
28720 IX86_BUILTIN_PHSUBSW256,
28721 IX86_BUILTIN_PMADDUBSW256,
28722 IX86_BUILTIN_PMADDWD256,
28723 IX86_BUILTIN_PMAXSB256,
28724 IX86_BUILTIN_PMAXSW256,
28725 IX86_BUILTIN_PMAXSD256,
28726 IX86_BUILTIN_PMAXUB256,
28727 IX86_BUILTIN_PMAXUW256,
28728 IX86_BUILTIN_PMAXUD256,
28729 IX86_BUILTIN_PMINSB256,
28730 IX86_BUILTIN_PMINSW256,
28731 IX86_BUILTIN_PMINSD256,
28732 IX86_BUILTIN_PMINUB256,
28733 IX86_BUILTIN_PMINUW256,
28734 IX86_BUILTIN_PMINUD256,
28735 IX86_BUILTIN_PMOVMSKB256,
28736 IX86_BUILTIN_PMOVSXBW256,
28737 IX86_BUILTIN_PMOVSXBD256,
28738 IX86_BUILTIN_PMOVSXBQ256,
28739 IX86_BUILTIN_PMOVSXWD256,
28740 IX86_BUILTIN_PMOVSXWQ256,
28741 IX86_BUILTIN_PMOVSXDQ256,
28742 IX86_BUILTIN_PMOVZXBW256,
28743 IX86_BUILTIN_PMOVZXBD256,
28744 IX86_BUILTIN_PMOVZXBQ256,
28745 IX86_BUILTIN_PMOVZXWD256,
28746 IX86_BUILTIN_PMOVZXWQ256,
28747 IX86_BUILTIN_PMOVZXDQ256,
28748 IX86_BUILTIN_PMULDQ256,
28749 IX86_BUILTIN_PMULHRSW256,
28750 IX86_BUILTIN_PMULHUW256,
28751 IX86_BUILTIN_PMULHW256,
28752 IX86_BUILTIN_PMULLW256,
28753 IX86_BUILTIN_PMULLD256,
28754 IX86_BUILTIN_PMULUDQ256,
28755 IX86_BUILTIN_POR256,
28756 IX86_BUILTIN_PSADBW256,
28757 IX86_BUILTIN_PSHUFB256,
28758 IX86_BUILTIN_PSHUFD256,
28759 IX86_BUILTIN_PSHUFHW256,
28760 IX86_BUILTIN_PSHUFLW256,
28761 IX86_BUILTIN_PSIGNB256,
28762 IX86_BUILTIN_PSIGNW256,
28763 IX86_BUILTIN_PSIGND256,
28764 IX86_BUILTIN_PSLLDQI256,
28765 IX86_BUILTIN_PSLLWI256,
28766 IX86_BUILTIN_PSLLW256,
28767 IX86_BUILTIN_PSLLDI256,
28768 IX86_BUILTIN_PSLLD256,
28769 IX86_BUILTIN_PSLLQI256,
28770 IX86_BUILTIN_PSLLQ256,
28771 IX86_BUILTIN_PSRAWI256,
28772 IX86_BUILTIN_PSRAW256,
28773 IX86_BUILTIN_PSRADI256,
28774 IX86_BUILTIN_PSRAD256,
28775 IX86_BUILTIN_PSRLDQI256,
28776 IX86_BUILTIN_PSRLWI256,
28777 IX86_BUILTIN_PSRLW256,
28778 IX86_BUILTIN_PSRLDI256,
28779 IX86_BUILTIN_PSRLD256,
28780 IX86_BUILTIN_PSRLQI256,
28781 IX86_BUILTIN_PSRLQ256,
28782 IX86_BUILTIN_PSUBB256,
28783 IX86_BUILTIN_PSUBW256,
28784 IX86_BUILTIN_PSUBD256,
28785 IX86_BUILTIN_PSUBQ256,
28786 IX86_BUILTIN_PSUBSB256,
28787 IX86_BUILTIN_PSUBSW256,
28788 IX86_BUILTIN_PSUBUSB256,
28789 IX86_BUILTIN_PSUBUSW256,
28790 IX86_BUILTIN_PUNPCKHBW256,
28791 IX86_BUILTIN_PUNPCKHWD256,
28792 IX86_BUILTIN_PUNPCKHDQ256,
28793 IX86_BUILTIN_PUNPCKHQDQ256,
28794 IX86_BUILTIN_PUNPCKLBW256,
28795 IX86_BUILTIN_PUNPCKLWD256,
28796 IX86_BUILTIN_PUNPCKLDQ256,
28797 IX86_BUILTIN_PUNPCKLQDQ256,
28798 IX86_BUILTIN_PXOR256,
28799 IX86_BUILTIN_MOVNTDQA256,
28800 IX86_BUILTIN_VBROADCASTSS_PS,
28801 IX86_BUILTIN_VBROADCASTSS_PS256,
28802 IX86_BUILTIN_VBROADCASTSD_PD256,
28803 IX86_BUILTIN_VBROADCASTSI256,
28804 IX86_BUILTIN_PBLENDD256,
28805 IX86_BUILTIN_PBLENDD128,
28806 IX86_BUILTIN_PBROADCASTB256,
28807 IX86_BUILTIN_PBROADCASTW256,
28808 IX86_BUILTIN_PBROADCASTD256,
28809 IX86_BUILTIN_PBROADCASTQ256,
28810 IX86_BUILTIN_PBROADCASTB128,
28811 IX86_BUILTIN_PBROADCASTW128,
28812 IX86_BUILTIN_PBROADCASTD128,
28813 IX86_BUILTIN_PBROADCASTQ128,
28814 IX86_BUILTIN_VPERMVARSI256,
28815 IX86_BUILTIN_VPERMDF256,
28816 IX86_BUILTIN_VPERMVARSF256,
28817 IX86_BUILTIN_VPERMDI256,
28818 IX86_BUILTIN_VPERMTI256,
28819 IX86_BUILTIN_VEXTRACT128I256,
28820 IX86_BUILTIN_VINSERT128I256,
28821 IX86_BUILTIN_MASKLOADD,
28822 IX86_BUILTIN_MASKLOADQ,
28823 IX86_BUILTIN_MASKLOADD256,
28824 IX86_BUILTIN_MASKLOADQ256,
28825 IX86_BUILTIN_MASKSTORED,
28826 IX86_BUILTIN_MASKSTOREQ,
28827 IX86_BUILTIN_MASKSTORED256,
28828 IX86_BUILTIN_MASKSTOREQ256,
28829 IX86_BUILTIN_PSLLVV4DI,
28830 IX86_BUILTIN_PSLLVV2DI,
28831 IX86_BUILTIN_PSLLVV8SI,
28832 IX86_BUILTIN_PSLLVV4SI,
28833 IX86_BUILTIN_PSRAVV8SI,
28834 IX86_BUILTIN_PSRAVV4SI,
28835 IX86_BUILTIN_PSRLVV4DI,
28836 IX86_BUILTIN_PSRLVV2DI,
28837 IX86_BUILTIN_PSRLVV8SI,
28838 IX86_BUILTIN_PSRLVV4SI,
28840 IX86_BUILTIN_GATHERSIV2DF,
28841 IX86_BUILTIN_GATHERSIV4DF,
28842 IX86_BUILTIN_GATHERDIV2DF,
28843 IX86_BUILTIN_GATHERDIV4DF,
28844 IX86_BUILTIN_GATHERSIV4SF,
28845 IX86_BUILTIN_GATHERSIV8SF,
28846 IX86_BUILTIN_GATHERDIV4SF,
28847 IX86_BUILTIN_GATHERDIV8SF,
28848 IX86_BUILTIN_GATHERSIV2DI,
28849 IX86_BUILTIN_GATHERSIV4DI,
28850 IX86_BUILTIN_GATHERDIV2DI,
28851 IX86_BUILTIN_GATHERDIV4DI,
28852 IX86_BUILTIN_GATHERSIV4SI,
28853 IX86_BUILTIN_GATHERSIV8SI,
28854 IX86_BUILTIN_GATHERDIV4SI,
28855 IX86_BUILTIN_GATHERDIV8SI,
28857 /* AVX512F */
28858 IX86_BUILTIN_SI512_SI256,
28859 IX86_BUILTIN_PD512_PD256,
28860 IX86_BUILTIN_PS512_PS256,
28861 IX86_BUILTIN_SI512_SI,
28862 IX86_BUILTIN_PD512_PD,
28863 IX86_BUILTIN_PS512_PS,
28864 IX86_BUILTIN_ADDPD512,
28865 IX86_BUILTIN_ADDPS512,
28866 IX86_BUILTIN_ADDSD_ROUND,
28867 IX86_BUILTIN_ADDSS_ROUND,
28868 IX86_BUILTIN_ALIGND512,
28869 IX86_BUILTIN_ALIGNQ512,
28870 IX86_BUILTIN_BLENDMD512,
28871 IX86_BUILTIN_BLENDMPD512,
28872 IX86_BUILTIN_BLENDMPS512,
28873 IX86_BUILTIN_BLENDMQ512,
28874 IX86_BUILTIN_BROADCASTF32X4_512,
28875 IX86_BUILTIN_BROADCASTF64X4_512,
28876 IX86_BUILTIN_BROADCASTI32X4_512,
28877 IX86_BUILTIN_BROADCASTI64X4_512,
28878 IX86_BUILTIN_BROADCASTSD512,
28879 IX86_BUILTIN_BROADCASTSS512,
28880 IX86_BUILTIN_CMPD512,
28881 IX86_BUILTIN_CMPPD512,
28882 IX86_BUILTIN_CMPPS512,
28883 IX86_BUILTIN_CMPQ512,
28884 IX86_BUILTIN_CMPSD_MASK,
28885 IX86_BUILTIN_CMPSS_MASK,
28886 IX86_BUILTIN_COMIDF,
28887 IX86_BUILTIN_COMISF,
28888 IX86_BUILTIN_COMPRESSPD512,
28889 IX86_BUILTIN_COMPRESSPDSTORE512,
28890 IX86_BUILTIN_COMPRESSPS512,
28891 IX86_BUILTIN_COMPRESSPSSTORE512,
28892 IX86_BUILTIN_CVTDQ2PD512,
28893 IX86_BUILTIN_CVTDQ2PS512,
28894 IX86_BUILTIN_CVTPD2DQ512,
28895 IX86_BUILTIN_CVTPD2PS512,
28896 IX86_BUILTIN_CVTPD2UDQ512,
28897 IX86_BUILTIN_CVTPH2PS512,
28898 IX86_BUILTIN_CVTPS2DQ512,
28899 IX86_BUILTIN_CVTPS2PD512,
28900 IX86_BUILTIN_CVTPS2PH512,
28901 IX86_BUILTIN_CVTPS2UDQ512,
28902 IX86_BUILTIN_CVTSD2SS_ROUND,
28903 IX86_BUILTIN_CVTSI2SD64,
28904 IX86_BUILTIN_CVTSI2SS32,
28905 IX86_BUILTIN_CVTSI2SS64,
28906 IX86_BUILTIN_CVTSS2SD_ROUND,
28907 IX86_BUILTIN_CVTTPD2DQ512,
28908 IX86_BUILTIN_CVTTPD2UDQ512,
28909 IX86_BUILTIN_CVTTPS2DQ512,
28910 IX86_BUILTIN_CVTTPS2UDQ512,
28911 IX86_BUILTIN_CVTUDQ2PD512,
28912 IX86_BUILTIN_CVTUDQ2PS512,
28913 IX86_BUILTIN_CVTUSI2SD32,
28914 IX86_BUILTIN_CVTUSI2SD64,
28915 IX86_BUILTIN_CVTUSI2SS32,
28916 IX86_BUILTIN_CVTUSI2SS64,
28917 IX86_BUILTIN_DIVPD512,
28918 IX86_BUILTIN_DIVPS512,
28919 IX86_BUILTIN_DIVSD_ROUND,
28920 IX86_BUILTIN_DIVSS_ROUND,
28921 IX86_BUILTIN_EXPANDPD512,
28922 IX86_BUILTIN_EXPANDPD512Z,
28923 IX86_BUILTIN_EXPANDPDLOAD512,
28924 IX86_BUILTIN_EXPANDPDLOAD512Z,
28925 IX86_BUILTIN_EXPANDPS512,
28926 IX86_BUILTIN_EXPANDPS512Z,
28927 IX86_BUILTIN_EXPANDPSLOAD512,
28928 IX86_BUILTIN_EXPANDPSLOAD512Z,
28929 IX86_BUILTIN_EXTRACTF32X4,
28930 IX86_BUILTIN_EXTRACTF64X4,
28931 IX86_BUILTIN_EXTRACTI32X4,
28932 IX86_BUILTIN_EXTRACTI64X4,
28933 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28934 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28935 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28936 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28937 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28938 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28939 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28940 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28941 IX86_BUILTIN_GETEXPPD512,
28942 IX86_BUILTIN_GETEXPPS512,
28943 IX86_BUILTIN_GETEXPSD128,
28944 IX86_BUILTIN_GETEXPSS128,
28945 IX86_BUILTIN_GETMANTPD512,
28946 IX86_BUILTIN_GETMANTPS512,
28947 IX86_BUILTIN_GETMANTSD128,
28948 IX86_BUILTIN_GETMANTSS128,
28949 IX86_BUILTIN_INSERTF32X4,
28950 IX86_BUILTIN_INSERTF64X4,
28951 IX86_BUILTIN_INSERTI32X4,
28952 IX86_BUILTIN_INSERTI64X4,
28953 IX86_BUILTIN_LOADAPD512,
28954 IX86_BUILTIN_LOADAPS512,
28955 IX86_BUILTIN_LOADDQUDI512,
28956 IX86_BUILTIN_LOADDQUSI512,
28957 IX86_BUILTIN_LOADUPD512,
28958 IX86_BUILTIN_LOADUPS512,
28959 IX86_BUILTIN_MAXPD512,
28960 IX86_BUILTIN_MAXPS512,
28961 IX86_BUILTIN_MAXSD_ROUND,
28962 IX86_BUILTIN_MAXSS_ROUND,
28963 IX86_BUILTIN_MINPD512,
28964 IX86_BUILTIN_MINPS512,
28965 IX86_BUILTIN_MINSD_ROUND,
28966 IX86_BUILTIN_MINSS_ROUND,
28967 IX86_BUILTIN_MOVAPD512,
28968 IX86_BUILTIN_MOVAPS512,
28969 IX86_BUILTIN_MOVDDUP512,
28970 IX86_BUILTIN_MOVDQA32LOAD512,
28971 IX86_BUILTIN_MOVDQA32STORE512,
28972 IX86_BUILTIN_MOVDQA32_512,
28973 IX86_BUILTIN_MOVDQA64LOAD512,
28974 IX86_BUILTIN_MOVDQA64STORE512,
28975 IX86_BUILTIN_MOVDQA64_512,
28976 IX86_BUILTIN_MOVNTDQ512,
28977 IX86_BUILTIN_MOVNTDQA512,
28978 IX86_BUILTIN_MOVNTPD512,
28979 IX86_BUILTIN_MOVNTPS512,
28980 IX86_BUILTIN_MOVSHDUP512,
28981 IX86_BUILTIN_MOVSLDUP512,
28982 IX86_BUILTIN_MULPD512,
28983 IX86_BUILTIN_MULPS512,
28984 IX86_BUILTIN_MULSD_ROUND,
28985 IX86_BUILTIN_MULSS_ROUND,
28986 IX86_BUILTIN_PABSD512,
28987 IX86_BUILTIN_PABSQ512,
28988 IX86_BUILTIN_PADDD512,
28989 IX86_BUILTIN_PADDQ512,
28990 IX86_BUILTIN_PANDD512,
28991 IX86_BUILTIN_PANDND512,
28992 IX86_BUILTIN_PANDNQ512,
28993 IX86_BUILTIN_PANDQ512,
28994 IX86_BUILTIN_PBROADCASTD512,
28995 IX86_BUILTIN_PBROADCASTD512_GPR,
28996 IX86_BUILTIN_PBROADCASTMB512,
28997 IX86_BUILTIN_PBROADCASTMW512,
28998 IX86_BUILTIN_PBROADCASTQ512,
28999 IX86_BUILTIN_PBROADCASTQ512_GPR,
29000 IX86_BUILTIN_PCMPEQD512_MASK,
29001 IX86_BUILTIN_PCMPEQQ512_MASK,
29002 IX86_BUILTIN_PCMPGTD512_MASK,
29003 IX86_BUILTIN_PCMPGTQ512_MASK,
29004 IX86_BUILTIN_PCOMPRESSD512,
29005 IX86_BUILTIN_PCOMPRESSDSTORE512,
29006 IX86_BUILTIN_PCOMPRESSQ512,
29007 IX86_BUILTIN_PCOMPRESSQSTORE512,
29008 IX86_BUILTIN_PEXPANDD512,
29009 IX86_BUILTIN_PEXPANDD512Z,
29010 IX86_BUILTIN_PEXPANDDLOAD512,
29011 IX86_BUILTIN_PEXPANDDLOAD512Z,
29012 IX86_BUILTIN_PEXPANDQ512,
29013 IX86_BUILTIN_PEXPANDQ512Z,
29014 IX86_BUILTIN_PEXPANDQLOAD512,
29015 IX86_BUILTIN_PEXPANDQLOAD512Z,
29016 IX86_BUILTIN_PMAXSD512,
29017 IX86_BUILTIN_PMAXSQ512,
29018 IX86_BUILTIN_PMAXUD512,
29019 IX86_BUILTIN_PMAXUQ512,
29020 IX86_BUILTIN_PMINSD512,
29021 IX86_BUILTIN_PMINSQ512,
29022 IX86_BUILTIN_PMINUD512,
29023 IX86_BUILTIN_PMINUQ512,
29024 IX86_BUILTIN_PMOVDB512,
29025 IX86_BUILTIN_PMOVDB512_MEM,
29026 IX86_BUILTIN_PMOVDW512,
29027 IX86_BUILTIN_PMOVDW512_MEM,
29028 IX86_BUILTIN_PMOVQB512,
29029 IX86_BUILTIN_PMOVQB512_MEM,
29030 IX86_BUILTIN_PMOVQD512,
29031 IX86_BUILTIN_PMOVQD512_MEM,
29032 IX86_BUILTIN_PMOVQW512,
29033 IX86_BUILTIN_PMOVQW512_MEM,
29034 IX86_BUILTIN_PMOVSDB512,
29035 IX86_BUILTIN_PMOVSDB512_MEM,
29036 IX86_BUILTIN_PMOVSDW512,
29037 IX86_BUILTIN_PMOVSDW512_MEM,
29038 IX86_BUILTIN_PMOVSQB512,
29039 IX86_BUILTIN_PMOVSQB512_MEM,
29040 IX86_BUILTIN_PMOVSQD512,
29041 IX86_BUILTIN_PMOVSQD512_MEM,
29042 IX86_BUILTIN_PMOVSQW512,
29043 IX86_BUILTIN_PMOVSQW512_MEM,
29044 IX86_BUILTIN_PMOVSXBD512,
29045 IX86_BUILTIN_PMOVSXBQ512,
29046 IX86_BUILTIN_PMOVSXDQ512,
29047 IX86_BUILTIN_PMOVSXWD512,
29048 IX86_BUILTIN_PMOVSXWQ512,
29049 IX86_BUILTIN_PMOVUSDB512,
29050 IX86_BUILTIN_PMOVUSDB512_MEM,
29051 IX86_BUILTIN_PMOVUSDW512,
29052 IX86_BUILTIN_PMOVUSDW512_MEM,
29053 IX86_BUILTIN_PMOVUSQB512,
29054 IX86_BUILTIN_PMOVUSQB512_MEM,
29055 IX86_BUILTIN_PMOVUSQD512,
29056 IX86_BUILTIN_PMOVUSQD512_MEM,
29057 IX86_BUILTIN_PMOVUSQW512,
29058 IX86_BUILTIN_PMOVUSQW512_MEM,
29059 IX86_BUILTIN_PMOVZXBD512,
29060 IX86_BUILTIN_PMOVZXBQ512,
29061 IX86_BUILTIN_PMOVZXDQ512,
29062 IX86_BUILTIN_PMOVZXWD512,
29063 IX86_BUILTIN_PMOVZXWQ512,
29064 IX86_BUILTIN_PMULDQ512,
29065 IX86_BUILTIN_PMULLD512,
29066 IX86_BUILTIN_PMULUDQ512,
29067 IX86_BUILTIN_PORD512,
29068 IX86_BUILTIN_PORQ512,
29069 IX86_BUILTIN_PROLD512,
29070 IX86_BUILTIN_PROLQ512,
29071 IX86_BUILTIN_PROLVD512,
29072 IX86_BUILTIN_PROLVQ512,
29073 IX86_BUILTIN_PRORD512,
29074 IX86_BUILTIN_PRORQ512,
29075 IX86_BUILTIN_PRORVD512,
29076 IX86_BUILTIN_PRORVQ512,
29077 IX86_BUILTIN_PSHUFD512,
29078 IX86_BUILTIN_PSLLD512,
29079 IX86_BUILTIN_PSLLDI512,
29080 IX86_BUILTIN_PSLLQ512,
29081 IX86_BUILTIN_PSLLQI512,
29082 IX86_BUILTIN_PSLLVV16SI,
29083 IX86_BUILTIN_PSLLVV8DI,
29084 IX86_BUILTIN_PSRAD512,
29085 IX86_BUILTIN_PSRADI512,
29086 IX86_BUILTIN_PSRAQ512,
29087 IX86_BUILTIN_PSRAQI512,
29088 IX86_BUILTIN_PSRAVV16SI,
29089 IX86_BUILTIN_PSRAVV8DI,
29090 IX86_BUILTIN_PSRLD512,
29091 IX86_BUILTIN_PSRLDI512,
29092 IX86_BUILTIN_PSRLQ512,
29093 IX86_BUILTIN_PSRLQI512,
29094 IX86_BUILTIN_PSRLVV16SI,
29095 IX86_BUILTIN_PSRLVV8DI,
29096 IX86_BUILTIN_PSUBD512,
29097 IX86_BUILTIN_PSUBQ512,
29098 IX86_BUILTIN_PTESTMD512,
29099 IX86_BUILTIN_PTESTMQ512,
29100 IX86_BUILTIN_PTESTNMD512,
29101 IX86_BUILTIN_PTESTNMQ512,
29102 IX86_BUILTIN_PUNPCKHDQ512,
29103 IX86_BUILTIN_PUNPCKHQDQ512,
29104 IX86_BUILTIN_PUNPCKLDQ512,
29105 IX86_BUILTIN_PUNPCKLQDQ512,
29106 IX86_BUILTIN_PXORD512,
29107 IX86_BUILTIN_PXORQ512,
29108 IX86_BUILTIN_RCP14PD512,
29109 IX86_BUILTIN_RCP14PS512,
29110 IX86_BUILTIN_RCP14SD,
29111 IX86_BUILTIN_RCP14SS,
29112 IX86_BUILTIN_RNDSCALEPD,
29113 IX86_BUILTIN_RNDSCALEPS,
29114 IX86_BUILTIN_RNDSCALESD,
29115 IX86_BUILTIN_RNDSCALESS,
29116 IX86_BUILTIN_RSQRT14PD512,
29117 IX86_BUILTIN_RSQRT14PS512,
29118 IX86_BUILTIN_RSQRT14SD,
29119 IX86_BUILTIN_RSQRT14SS,
29120 IX86_BUILTIN_SCALEFPD512,
29121 IX86_BUILTIN_SCALEFPS512,
29122 IX86_BUILTIN_SCALEFSD,
29123 IX86_BUILTIN_SCALEFSS,
29124 IX86_BUILTIN_SHUFPD512,
29125 IX86_BUILTIN_SHUFPS512,
29126 IX86_BUILTIN_SHUF_F32x4,
29127 IX86_BUILTIN_SHUF_F64x2,
29128 IX86_BUILTIN_SHUF_I32x4,
29129 IX86_BUILTIN_SHUF_I64x2,
29130 IX86_BUILTIN_SQRTPD512,
29131 IX86_BUILTIN_SQRTPD512_MASK,
29132 IX86_BUILTIN_SQRTPS512_MASK,
29133 IX86_BUILTIN_SQRTPS_NR512,
29134 IX86_BUILTIN_SQRTSD_ROUND,
29135 IX86_BUILTIN_SQRTSS_ROUND,
29136 IX86_BUILTIN_STOREAPD512,
29137 IX86_BUILTIN_STOREAPS512,
29138 IX86_BUILTIN_STOREDQUDI512,
29139 IX86_BUILTIN_STOREDQUSI512,
29140 IX86_BUILTIN_STOREUPD512,
29141 IX86_BUILTIN_STOREUPS512,
29142 IX86_BUILTIN_SUBPD512,
29143 IX86_BUILTIN_SUBPS512,
29144 IX86_BUILTIN_SUBSD_ROUND,
29145 IX86_BUILTIN_SUBSS_ROUND,
29146 IX86_BUILTIN_UCMPD512,
29147 IX86_BUILTIN_UCMPQ512,
29148 IX86_BUILTIN_UNPCKHPD512,
29149 IX86_BUILTIN_UNPCKHPS512,
29150 IX86_BUILTIN_UNPCKLPD512,
29151 IX86_BUILTIN_UNPCKLPS512,
29152 IX86_BUILTIN_VCVTSD2SI32,
29153 IX86_BUILTIN_VCVTSD2SI64,
29154 IX86_BUILTIN_VCVTSD2USI32,
29155 IX86_BUILTIN_VCVTSD2USI64,
29156 IX86_BUILTIN_VCVTSS2SI32,
29157 IX86_BUILTIN_VCVTSS2SI64,
29158 IX86_BUILTIN_VCVTSS2USI32,
29159 IX86_BUILTIN_VCVTSS2USI64,
29160 IX86_BUILTIN_VCVTTSD2SI32,
29161 IX86_BUILTIN_VCVTTSD2SI64,
29162 IX86_BUILTIN_VCVTTSD2USI32,
29163 IX86_BUILTIN_VCVTTSD2USI64,
29164 IX86_BUILTIN_VCVTTSS2SI32,
29165 IX86_BUILTIN_VCVTTSS2SI64,
29166 IX86_BUILTIN_VCVTTSS2USI32,
29167 IX86_BUILTIN_VCVTTSS2USI64,
29168 IX86_BUILTIN_VFMADDPD512_MASK,
29169 IX86_BUILTIN_VFMADDPD512_MASK3,
29170 IX86_BUILTIN_VFMADDPD512_MASKZ,
29171 IX86_BUILTIN_VFMADDPS512_MASK,
29172 IX86_BUILTIN_VFMADDPS512_MASK3,
29173 IX86_BUILTIN_VFMADDPS512_MASKZ,
29174 IX86_BUILTIN_VFMADDSD3_ROUND,
29175 IX86_BUILTIN_VFMADDSS3_ROUND,
29176 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29177 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29178 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29179 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29180 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29181 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29182 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29183 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29184 IX86_BUILTIN_VFMSUBPD512_MASK3,
29185 IX86_BUILTIN_VFMSUBPS512_MASK3,
29186 IX86_BUILTIN_VFMSUBSD3_MASK3,
29187 IX86_BUILTIN_VFMSUBSS3_MASK3,
29188 IX86_BUILTIN_VFNMADDPD512_MASK,
29189 IX86_BUILTIN_VFNMADDPS512_MASK,
29190 IX86_BUILTIN_VFNMSUBPD512_MASK,
29191 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29192 IX86_BUILTIN_VFNMSUBPS512_MASK,
29193 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29194 IX86_BUILTIN_VPCLZCNTD512,
29195 IX86_BUILTIN_VPCLZCNTQ512,
29196 IX86_BUILTIN_VPCONFLICTD512,
29197 IX86_BUILTIN_VPCONFLICTQ512,
29198 IX86_BUILTIN_VPERMDF512,
29199 IX86_BUILTIN_VPERMDI512,
29200 IX86_BUILTIN_VPERMI2VARD512,
29201 IX86_BUILTIN_VPERMI2VARPD512,
29202 IX86_BUILTIN_VPERMI2VARPS512,
29203 IX86_BUILTIN_VPERMI2VARQ512,
29204 IX86_BUILTIN_VPERMILPD512,
29205 IX86_BUILTIN_VPERMILPS512,
29206 IX86_BUILTIN_VPERMILVARPD512,
29207 IX86_BUILTIN_VPERMILVARPS512,
29208 IX86_BUILTIN_VPERMT2VARD512,
29209 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29210 IX86_BUILTIN_VPERMT2VARPD512,
29211 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29212 IX86_BUILTIN_VPERMT2VARPS512,
29213 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29214 IX86_BUILTIN_VPERMT2VARQ512,
29215 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29216 IX86_BUILTIN_VPERMVARDF512,
29217 IX86_BUILTIN_VPERMVARDI512,
29218 IX86_BUILTIN_VPERMVARSF512,
29219 IX86_BUILTIN_VPERMVARSI512,
29220 IX86_BUILTIN_VTERNLOGD512_MASK,
29221 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29222 IX86_BUILTIN_VTERNLOGQ512_MASK,
29223 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29225 /* Mask arithmetic operations */
29226 IX86_BUILTIN_KAND16,
29227 IX86_BUILTIN_KANDN16,
29228 IX86_BUILTIN_KNOT16,
29229 IX86_BUILTIN_KOR16,
29230 IX86_BUILTIN_KORTESTC16,
29231 IX86_BUILTIN_KORTESTZ16,
29232 IX86_BUILTIN_KUNPCKBW,
29233 IX86_BUILTIN_KXNOR16,
29234 IX86_BUILTIN_KXOR16,
29235 IX86_BUILTIN_KMOV16,
29237 /* AVX512VL. */
29238 IX86_BUILTIN_PMOVUSQD256_MEM,
29239 IX86_BUILTIN_PMOVUSQD128_MEM,
29240 IX86_BUILTIN_PMOVSQD256_MEM,
29241 IX86_BUILTIN_PMOVSQD128_MEM,
29242 IX86_BUILTIN_PMOVQD256_MEM,
29243 IX86_BUILTIN_PMOVQD128_MEM,
29244 IX86_BUILTIN_PMOVUSQW256_MEM,
29245 IX86_BUILTIN_PMOVUSQW128_MEM,
29246 IX86_BUILTIN_PMOVSQW256_MEM,
29247 IX86_BUILTIN_PMOVSQW128_MEM,
29248 IX86_BUILTIN_PMOVQW256_MEM,
29249 IX86_BUILTIN_PMOVQW128_MEM,
29250 IX86_BUILTIN_PMOVUSQB256_MEM,
29251 IX86_BUILTIN_PMOVUSQB128_MEM,
29252 IX86_BUILTIN_PMOVSQB256_MEM,
29253 IX86_BUILTIN_PMOVSQB128_MEM,
29254 IX86_BUILTIN_PMOVQB256_MEM,
29255 IX86_BUILTIN_PMOVQB128_MEM,
29256 IX86_BUILTIN_PMOVUSDW256_MEM,
29257 IX86_BUILTIN_PMOVUSDW128_MEM,
29258 IX86_BUILTIN_PMOVSDW256_MEM,
29259 IX86_BUILTIN_PMOVSDW128_MEM,
29260 IX86_BUILTIN_PMOVDW256_MEM,
29261 IX86_BUILTIN_PMOVDW128_MEM,
29262 IX86_BUILTIN_PMOVUSDB256_MEM,
29263 IX86_BUILTIN_PMOVUSDB128_MEM,
29264 IX86_BUILTIN_PMOVSDB256_MEM,
29265 IX86_BUILTIN_PMOVSDB128_MEM,
29266 IX86_BUILTIN_PMOVDB256_MEM,
29267 IX86_BUILTIN_PMOVDB128_MEM,
29268 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29269 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29270 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29271 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29272 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29273 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29274 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29275 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29276 IX86_BUILTIN_LOADAPD256_MASK,
29277 IX86_BUILTIN_LOADAPD128_MASK,
29278 IX86_BUILTIN_LOADAPS256_MASK,
29279 IX86_BUILTIN_LOADAPS128_MASK,
29280 IX86_BUILTIN_STOREAPD256_MASK,
29281 IX86_BUILTIN_STOREAPD128_MASK,
29282 IX86_BUILTIN_STOREAPS256_MASK,
29283 IX86_BUILTIN_STOREAPS128_MASK,
29284 IX86_BUILTIN_LOADUPD256_MASK,
29285 IX86_BUILTIN_LOADUPD128_MASK,
29286 IX86_BUILTIN_LOADUPS256_MASK,
29287 IX86_BUILTIN_LOADUPS128_MASK,
29288 IX86_BUILTIN_STOREUPD256_MASK,
29289 IX86_BUILTIN_STOREUPD128_MASK,
29290 IX86_BUILTIN_STOREUPS256_MASK,
29291 IX86_BUILTIN_STOREUPS128_MASK,
29292 IX86_BUILTIN_LOADDQUDI256_MASK,
29293 IX86_BUILTIN_LOADDQUDI128_MASK,
29294 IX86_BUILTIN_LOADDQUSI256_MASK,
29295 IX86_BUILTIN_LOADDQUSI128_MASK,
29296 IX86_BUILTIN_LOADDQUHI256_MASK,
29297 IX86_BUILTIN_LOADDQUHI128_MASK,
29298 IX86_BUILTIN_LOADDQUQI256_MASK,
29299 IX86_BUILTIN_LOADDQUQI128_MASK,
29300 IX86_BUILTIN_STOREDQUDI256_MASK,
29301 IX86_BUILTIN_STOREDQUDI128_MASK,
29302 IX86_BUILTIN_STOREDQUSI256_MASK,
29303 IX86_BUILTIN_STOREDQUSI128_MASK,
29304 IX86_BUILTIN_STOREDQUHI256_MASK,
29305 IX86_BUILTIN_STOREDQUHI128_MASK,
29306 IX86_BUILTIN_STOREDQUQI256_MASK,
29307 IX86_BUILTIN_STOREDQUQI128_MASK,
29308 IX86_BUILTIN_COMPRESSPDSTORE256,
29309 IX86_BUILTIN_COMPRESSPDSTORE128,
29310 IX86_BUILTIN_COMPRESSPSSTORE256,
29311 IX86_BUILTIN_COMPRESSPSSTORE128,
29312 IX86_BUILTIN_PCOMPRESSQSTORE256,
29313 IX86_BUILTIN_PCOMPRESSQSTORE128,
29314 IX86_BUILTIN_PCOMPRESSDSTORE256,
29315 IX86_BUILTIN_PCOMPRESSDSTORE128,
29316 IX86_BUILTIN_EXPANDPDLOAD256,
29317 IX86_BUILTIN_EXPANDPDLOAD128,
29318 IX86_BUILTIN_EXPANDPSLOAD256,
29319 IX86_BUILTIN_EXPANDPSLOAD128,
29320 IX86_BUILTIN_PEXPANDQLOAD256,
29321 IX86_BUILTIN_PEXPANDQLOAD128,
29322 IX86_BUILTIN_PEXPANDDLOAD256,
29323 IX86_BUILTIN_PEXPANDDLOAD128,
29324 IX86_BUILTIN_EXPANDPDLOAD256Z,
29325 IX86_BUILTIN_EXPANDPDLOAD128Z,
29326 IX86_BUILTIN_EXPANDPSLOAD256Z,
29327 IX86_BUILTIN_EXPANDPSLOAD128Z,
29328 IX86_BUILTIN_PEXPANDQLOAD256Z,
29329 IX86_BUILTIN_PEXPANDQLOAD128Z,
29330 IX86_BUILTIN_PEXPANDDLOAD256Z,
29331 IX86_BUILTIN_PEXPANDDLOAD128Z,
29332 IX86_BUILTIN_PALIGNR256_MASK,
29333 IX86_BUILTIN_PALIGNR128_MASK,
29334 IX86_BUILTIN_MOVDQA64_256_MASK,
29335 IX86_BUILTIN_MOVDQA64_128_MASK,
29336 IX86_BUILTIN_MOVDQA32_256_MASK,
29337 IX86_BUILTIN_MOVDQA32_128_MASK,
29338 IX86_BUILTIN_MOVAPD256_MASK,
29339 IX86_BUILTIN_MOVAPD128_MASK,
29340 IX86_BUILTIN_MOVAPS256_MASK,
29341 IX86_BUILTIN_MOVAPS128_MASK,
29342 IX86_BUILTIN_MOVDQUHI256_MASK,
29343 IX86_BUILTIN_MOVDQUHI128_MASK,
29344 IX86_BUILTIN_MOVDQUQI256_MASK,
29345 IX86_BUILTIN_MOVDQUQI128_MASK,
29346 IX86_BUILTIN_MINPS128_MASK,
29347 IX86_BUILTIN_MAXPS128_MASK,
29348 IX86_BUILTIN_MINPD128_MASK,
29349 IX86_BUILTIN_MAXPD128_MASK,
29350 IX86_BUILTIN_MAXPD256_MASK,
29351 IX86_BUILTIN_MAXPS256_MASK,
29352 IX86_BUILTIN_MINPD256_MASK,
29353 IX86_BUILTIN_MINPS256_MASK,
29354 IX86_BUILTIN_MULPS128_MASK,
29355 IX86_BUILTIN_DIVPS128_MASK,
29356 IX86_BUILTIN_MULPD128_MASK,
29357 IX86_BUILTIN_DIVPD128_MASK,
29358 IX86_BUILTIN_DIVPD256_MASK,
29359 IX86_BUILTIN_DIVPS256_MASK,
29360 IX86_BUILTIN_MULPD256_MASK,
29361 IX86_BUILTIN_MULPS256_MASK,
29362 IX86_BUILTIN_ADDPD128_MASK,
29363 IX86_BUILTIN_ADDPD256_MASK,
29364 IX86_BUILTIN_ADDPS128_MASK,
29365 IX86_BUILTIN_ADDPS256_MASK,
29366 IX86_BUILTIN_SUBPD128_MASK,
29367 IX86_BUILTIN_SUBPD256_MASK,
29368 IX86_BUILTIN_SUBPS128_MASK,
29369 IX86_BUILTIN_SUBPS256_MASK,
29370 IX86_BUILTIN_XORPD256_MASK,
29371 IX86_BUILTIN_XORPD128_MASK,
29372 IX86_BUILTIN_XORPS256_MASK,
29373 IX86_BUILTIN_XORPS128_MASK,
29374 IX86_BUILTIN_ORPD256_MASK,
29375 IX86_BUILTIN_ORPD128_MASK,
29376 IX86_BUILTIN_ORPS256_MASK,
29377 IX86_BUILTIN_ORPS128_MASK,
29378 IX86_BUILTIN_BROADCASTF32x2_256,
29379 IX86_BUILTIN_BROADCASTI32x2_256,
29380 IX86_BUILTIN_BROADCASTI32x2_128,
29381 IX86_BUILTIN_BROADCASTF64X2_256,
29382 IX86_BUILTIN_BROADCASTI64X2_256,
29383 IX86_BUILTIN_BROADCASTF32X4_256,
29384 IX86_BUILTIN_BROADCASTI32X4_256,
29385 IX86_BUILTIN_EXTRACTF32X4_256,
29386 IX86_BUILTIN_EXTRACTI32X4_256,
29387 IX86_BUILTIN_DBPSADBW256,
29388 IX86_BUILTIN_DBPSADBW128,
29389 IX86_BUILTIN_CVTTPD2QQ256,
29390 IX86_BUILTIN_CVTTPD2QQ128,
29391 IX86_BUILTIN_CVTTPD2UQQ256,
29392 IX86_BUILTIN_CVTTPD2UQQ128,
29393 IX86_BUILTIN_CVTPD2QQ256,
29394 IX86_BUILTIN_CVTPD2QQ128,
29395 IX86_BUILTIN_CVTPD2UQQ256,
29396 IX86_BUILTIN_CVTPD2UQQ128,
29397 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29398 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29399 IX86_BUILTIN_CVTTPS2QQ256,
29400 IX86_BUILTIN_CVTTPS2QQ128,
29401 IX86_BUILTIN_CVTTPS2UQQ256,
29402 IX86_BUILTIN_CVTTPS2UQQ128,
29403 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29404 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29405 IX86_BUILTIN_CVTTPS2UDQ256,
29406 IX86_BUILTIN_CVTTPS2UDQ128,
29407 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29408 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29409 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29410 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29411 IX86_BUILTIN_CVTPD2DQ256_MASK,
29412 IX86_BUILTIN_CVTPD2DQ128_MASK,
29413 IX86_BUILTIN_CVTDQ2PD256_MASK,
29414 IX86_BUILTIN_CVTDQ2PD128_MASK,
29415 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29416 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29417 IX86_BUILTIN_CVTDQ2PS256_MASK,
29418 IX86_BUILTIN_CVTDQ2PS128_MASK,
29419 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29420 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29421 IX86_BUILTIN_CVTPS2PD256_MASK,
29422 IX86_BUILTIN_CVTPS2PD128_MASK,
29423 IX86_BUILTIN_PBROADCASTB256_MASK,
29424 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29425 IX86_BUILTIN_PBROADCASTB128_MASK,
29426 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29427 IX86_BUILTIN_PBROADCASTW256_MASK,
29428 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29429 IX86_BUILTIN_PBROADCASTW128_MASK,
29430 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29431 IX86_BUILTIN_PBROADCASTD256_MASK,
29432 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29433 IX86_BUILTIN_PBROADCASTD128_MASK,
29434 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29435 IX86_BUILTIN_PBROADCASTQ256_MASK,
29436 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29437 IX86_BUILTIN_PBROADCASTQ128_MASK,
29438 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29439 IX86_BUILTIN_BROADCASTSS256,
29440 IX86_BUILTIN_BROADCASTSS128,
29441 IX86_BUILTIN_BROADCASTSD256,
29442 IX86_BUILTIN_EXTRACTF64X2_256,
29443 IX86_BUILTIN_EXTRACTI64X2_256,
29444 IX86_BUILTIN_INSERTF32X4_256,
29445 IX86_BUILTIN_INSERTI32X4_256,
29446 IX86_BUILTIN_PMOVSXBW256_MASK,
29447 IX86_BUILTIN_PMOVSXBW128_MASK,
29448 IX86_BUILTIN_PMOVSXBD256_MASK,
29449 IX86_BUILTIN_PMOVSXBD128_MASK,
29450 IX86_BUILTIN_PMOVSXBQ256_MASK,
29451 IX86_BUILTIN_PMOVSXBQ128_MASK,
29452 IX86_BUILTIN_PMOVSXWD256_MASK,
29453 IX86_BUILTIN_PMOVSXWD128_MASK,
29454 IX86_BUILTIN_PMOVSXWQ256_MASK,
29455 IX86_BUILTIN_PMOVSXWQ128_MASK,
29456 IX86_BUILTIN_PMOVSXDQ256_MASK,
29457 IX86_BUILTIN_PMOVSXDQ128_MASK,
29458 IX86_BUILTIN_PMOVZXBW256_MASK,
29459 IX86_BUILTIN_PMOVZXBW128_MASK,
29460 IX86_BUILTIN_PMOVZXBD256_MASK,
29461 IX86_BUILTIN_PMOVZXBD128_MASK,
29462 IX86_BUILTIN_PMOVZXBQ256_MASK,
29463 IX86_BUILTIN_PMOVZXBQ128_MASK,
29464 IX86_BUILTIN_PMOVZXWD256_MASK,
29465 IX86_BUILTIN_PMOVZXWD128_MASK,
29466 IX86_BUILTIN_PMOVZXWQ256_MASK,
29467 IX86_BUILTIN_PMOVZXWQ128_MASK,
29468 IX86_BUILTIN_PMOVZXDQ256_MASK,
29469 IX86_BUILTIN_PMOVZXDQ128_MASK,
29470 IX86_BUILTIN_REDUCEPD256_MASK,
29471 IX86_BUILTIN_REDUCEPD128_MASK,
29472 IX86_BUILTIN_REDUCEPS256_MASK,
29473 IX86_BUILTIN_REDUCEPS128_MASK,
29474 IX86_BUILTIN_REDUCESD_MASK,
29475 IX86_BUILTIN_REDUCESS_MASK,
29476 IX86_BUILTIN_VPERMVARHI256_MASK,
29477 IX86_BUILTIN_VPERMVARHI128_MASK,
29478 IX86_BUILTIN_VPERMT2VARHI256,
29479 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29480 IX86_BUILTIN_VPERMT2VARHI128,
29481 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29482 IX86_BUILTIN_VPERMI2VARHI256,
29483 IX86_BUILTIN_VPERMI2VARHI128,
29484 IX86_BUILTIN_RCP14PD256,
29485 IX86_BUILTIN_RCP14PD128,
29486 IX86_BUILTIN_RCP14PS256,
29487 IX86_BUILTIN_RCP14PS128,
29488 IX86_BUILTIN_RSQRT14PD256_MASK,
29489 IX86_BUILTIN_RSQRT14PD128_MASK,
29490 IX86_BUILTIN_RSQRT14PS256_MASK,
29491 IX86_BUILTIN_RSQRT14PS128_MASK,
29492 IX86_BUILTIN_SQRTPD256_MASK,
29493 IX86_BUILTIN_SQRTPD128_MASK,
29494 IX86_BUILTIN_SQRTPS256_MASK,
29495 IX86_BUILTIN_SQRTPS128_MASK,
29496 IX86_BUILTIN_PADDB128_MASK,
29497 IX86_BUILTIN_PADDW128_MASK,
29498 IX86_BUILTIN_PADDD128_MASK,
29499 IX86_BUILTIN_PADDQ128_MASK,
29500 IX86_BUILTIN_PSUBB128_MASK,
29501 IX86_BUILTIN_PSUBW128_MASK,
29502 IX86_BUILTIN_PSUBD128_MASK,
29503 IX86_BUILTIN_PSUBQ128_MASK,
29504 IX86_BUILTIN_PADDSB128_MASK,
29505 IX86_BUILTIN_PADDSW128_MASK,
29506 IX86_BUILTIN_PSUBSB128_MASK,
29507 IX86_BUILTIN_PSUBSW128_MASK,
29508 IX86_BUILTIN_PADDUSB128_MASK,
29509 IX86_BUILTIN_PADDUSW128_MASK,
29510 IX86_BUILTIN_PSUBUSB128_MASK,
29511 IX86_BUILTIN_PSUBUSW128_MASK,
29512 IX86_BUILTIN_PADDB256_MASK,
29513 IX86_BUILTIN_PADDW256_MASK,
29514 IX86_BUILTIN_PADDD256_MASK,
29515 IX86_BUILTIN_PADDQ256_MASK,
29516 IX86_BUILTIN_PADDSB256_MASK,
29517 IX86_BUILTIN_PADDSW256_MASK,
29518 IX86_BUILTIN_PADDUSB256_MASK,
29519 IX86_BUILTIN_PADDUSW256_MASK,
29520 IX86_BUILTIN_PSUBB256_MASK,
29521 IX86_BUILTIN_PSUBW256_MASK,
29522 IX86_BUILTIN_PSUBD256_MASK,
29523 IX86_BUILTIN_PSUBQ256_MASK,
29524 IX86_BUILTIN_PSUBSB256_MASK,
29525 IX86_BUILTIN_PSUBSW256_MASK,
29526 IX86_BUILTIN_PSUBUSB256_MASK,
29527 IX86_BUILTIN_PSUBUSW256_MASK,
29528 IX86_BUILTIN_SHUF_F64x2_256,
29529 IX86_BUILTIN_SHUF_I64x2_256,
29530 IX86_BUILTIN_SHUF_I32x4_256,
29531 IX86_BUILTIN_SHUF_F32x4_256,
29532 IX86_BUILTIN_PMOVWB128,
29533 IX86_BUILTIN_PMOVWB256,
29534 IX86_BUILTIN_PMOVSWB128,
29535 IX86_BUILTIN_PMOVSWB256,
29536 IX86_BUILTIN_PMOVUSWB128,
29537 IX86_BUILTIN_PMOVUSWB256,
29538 IX86_BUILTIN_PMOVDB128,
29539 IX86_BUILTIN_PMOVDB256,
29540 IX86_BUILTIN_PMOVSDB128,
29541 IX86_BUILTIN_PMOVSDB256,
29542 IX86_BUILTIN_PMOVUSDB128,
29543 IX86_BUILTIN_PMOVUSDB256,
29544 IX86_BUILTIN_PMOVDW128,
29545 IX86_BUILTIN_PMOVDW256,
29546 IX86_BUILTIN_PMOVSDW128,
29547 IX86_BUILTIN_PMOVSDW256,
29548 IX86_BUILTIN_PMOVUSDW128,
29549 IX86_BUILTIN_PMOVUSDW256,
29550 IX86_BUILTIN_PMOVQB128,
29551 IX86_BUILTIN_PMOVQB256,
29552 IX86_BUILTIN_PMOVSQB128,
29553 IX86_BUILTIN_PMOVSQB256,
29554 IX86_BUILTIN_PMOVUSQB128,
29555 IX86_BUILTIN_PMOVUSQB256,
29556 IX86_BUILTIN_PMOVQW128,
29557 IX86_BUILTIN_PMOVQW256,
29558 IX86_BUILTIN_PMOVSQW128,
29559 IX86_BUILTIN_PMOVSQW256,
29560 IX86_BUILTIN_PMOVUSQW128,
29561 IX86_BUILTIN_PMOVUSQW256,
29562 IX86_BUILTIN_PMOVQD128,
29563 IX86_BUILTIN_PMOVQD256,
29564 IX86_BUILTIN_PMOVSQD128,
29565 IX86_BUILTIN_PMOVSQD256,
29566 IX86_BUILTIN_PMOVUSQD128,
29567 IX86_BUILTIN_PMOVUSQD256,
29568 IX86_BUILTIN_RANGEPD256,
29569 IX86_BUILTIN_RANGEPD128,
29570 IX86_BUILTIN_RANGEPS256,
29571 IX86_BUILTIN_RANGEPS128,
29572 IX86_BUILTIN_GETEXPPS256,
29573 IX86_BUILTIN_GETEXPPD256,
29574 IX86_BUILTIN_GETEXPPS128,
29575 IX86_BUILTIN_GETEXPPD128,
29576 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29577 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29578 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29579 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29580 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29581 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29582 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29583 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29584 IX86_BUILTIN_PABSQ256,
29585 IX86_BUILTIN_PABSQ128,
29586 IX86_BUILTIN_PABSD256_MASK,
29587 IX86_BUILTIN_PABSD128_MASK,
29588 IX86_BUILTIN_PMULHRSW256_MASK,
29589 IX86_BUILTIN_PMULHRSW128_MASK,
29590 IX86_BUILTIN_PMULHUW128_MASK,
29591 IX86_BUILTIN_PMULHUW256_MASK,
29592 IX86_BUILTIN_PMULHW256_MASK,
29593 IX86_BUILTIN_PMULHW128_MASK,
29594 IX86_BUILTIN_PMULLW256_MASK,
29595 IX86_BUILTIN_PMULLW128_MASK,
29596 IX86_BUILTIN_PMULLQ256,
29597 IX86_BUILTIN_PMULLQ128,
29598 IX86_BUILTIN_ANDPD256_MASK,
29599 IX86_BUILTIN_ANDPD128_MASK,
29600 IX86_BUILTIN_ANDPS256_MASK,
29601 IX86_BUILTIN_ANDPS128_MASK,
29602 IX86_BUILTIN_ANDNPD256_MASK,
29603 IX86_BUILTIN_ANDNPD128_MASK,
29604 IX86_BUILTIN_ANDNPS256_MASK,
29605 IX86_BUILTIN_ANDNPS128_MASK,
29606 IX86_BUILTIN_PSLLWI128_MASK,
29607 IX86_BUILTIN_PSLLDI128_MASK,
29608 IX86_BUILTIN_PSLLQI128_MASK,
29609 IX86_BUILTIN_PSLLW128_MASK,
29610 IX86_BUILTIN_PSLLD128_MASK,
29611 IX86_BUILTIN_PSLLQ128_MASK,
29612 IX86_BUILTIN_PSLLWI256_MASK ,
29613 IX86_BUILTIN_PSLLW256_MASK,
29614 IX86_BUILTIN_PSLLDI256_MASK,
29615 IX86_BUILTIN_PSLLD256_MASK,
29616 IX86_BUILTIN_PSLLQI256_MASK,
29617 IX86_BUILTIN_PSLLQ256_MASK,
29618 IX86_BUILTIN_PSRADI128_MASK,
29619 IX86_BUILTIN_PSRAD128_MASK,
29620 IX86_BUILTIN_PSRADI256_MASK,
29621 IX86_BUILTIN_PSRAD256_MASK,
29622 IX86_BUILTIN_PSRAQI128_MASK,
29623 IX86_BUILTIN_PSRAQ128_MASK,
29624 IX86_BUILTIN_PSRAQI256_MASK,
29625 IX86_BUILTIN_PSRAQ256_MASK,
29626 IX86_BUILTIN_PANDD256,
29627 IX86_BUILTIN_PANDD128,
29628 IX86_BUILTIN_PSRLDI128_MASK,
29629 IX86_BUILTIN_PSRLD128_MASK,
29630 IX86_BUILTIN_PSRLDI256_MASK,
29631 IX86_BUILTIN_PSRLD256_MASK,
29632 IX86_BUILTIN_PSRLQI128_MASK,
29633 IX86_BUILTIN_PSRLQ128_MASK,
29634 IX86_BUILTIN_PSRLQI256_MASK,
29635 IX86_BUILTIN_PSRLQ256_MASK,
29636 IX86_BUILTIN_PANDQ256,
29637 IX86_BUILTIN_PANDQ128,
29638 IX86_BUILTIN_PANDND256,
29639 IX86_BUILTIN_PANDND128,
29640 IX86_BUILTIN_PANDNQ256,
29641 IX86_BUILTIN_PANDNQ128,
29642 IX86_BUILTIN_PORD256,
29643 IX86_BUILTIN_PORD128,
29644 IX86_BUILTIN_PORQ256,
29645 IX86_BUILTIN_PORQ128,
29646 IX86_BUILTIN_PXORD256,
29647 IX86_BUILTIN_PXORD128,
29648 IX86_BUILTIN_PXORQ256,
29649 IX86_BUILTIN_PXORQ128,
29650 IX86_BUILTIN_PACKSSWB256_MASK,
29651 IX86_BUILTIN_PACKSSWB128_MASK,
29652 IX86_BUILTIN_PACKUSWB256_MASK,
29653 IX86_BUILTIN_PACKUSWB128_MASK,
29654 IX86_BUILTIN_RNDSCALEPS256,
29655 IX86_BUILTIN_RNDSCALEPD256,
29656 IX86_BUILTIN_RNDSCALEPS128,
29657 IX86_BUILTIN_RNDSCALEPD128,
29658 IX86_BUILTIN_VTERNLOGQ256_MASK,
29659 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29660 IX86_BUILTIN_VTERNLOGD256_MASK,
29661 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29662 IX86_BUILTIN_VTERNLOGQ128_MASK,
29663 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29664 IX86_BUILTIN_VTERNLOGD128_MASK,
29665 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29666 IX86_BUILTIN_SCALEFPD256,
29667 IX86_BUILTIN_SCALEFPS256,
29668 IX86_BUILTIN_SCALEFPD128,
29669 IX86_BUILTIN_SCALEFPS128,
29670 IX86_BUILTIN_VFMADDPD256_MASK,
29671 IX86_BUILTIN_VFMADDPD256_MASK3,
29672 IX86_BUILTIN_VFMADDPD256_MASKZ,
29673 IX86_BUILTIN_VFMADDPD128_MASK,
29674 IX86_BUILTIN_VFMADDPD128_MASK3,
29675 IX86_BUILTIN_VFMADDPD128_MASKZ,
29676 IX86_BUILTIN_VFMADDPS256_MASK,
29677 IX86_BUILTIN_VFMADDPS256_MASK3,
29678 IX86_BUILTIN_VFMADDPS256_MASKZ,
29679 IX86_BUILTIN_VFMADDPS128_MASK,
29680 IX86_BUILTIN_VFMADDPS128_MASK3,
29681 IX86_BUILTIN_VFMADDPS128_MASKZ,
29682 IX86_BUILTIN_VFMSUBPD256_MASK3,
29683 IX86_BUILTIN_VFMSUBPD128_MASK3,
29684 IX86_BUILTIN_VFMSUBPS256_MASK3,
29685 IX86_BUILTIN_VFMSUBPS128_MASK3,
29686 IX86_BUILTIN_VFNMADDPD256_MASK,
29687 IX86_BUILTIN_VFNMADDPD128_MASK,
29688 IX86_BUILTIN_VFNMADDPS256_MASK,
29689 IX86_BUILTIN_VFNMADDPS128_MASK,
29690 IX86_BUILTIN_VFNMSUBPD256_MASK,
29691 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29692 IX86_BUILTIN_VFNMSUBPD128_MASK,
29693 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29694 IX86_BUILTIN_VFNMSUBPS256_MASK,
29695 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29696 IX86_BUILTIN_VFNMSUBPS128_MASK,
29697 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29698 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29699 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29700 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29701 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29702 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29703 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29704 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29705 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29706 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29707 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29708 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29709 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29710 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29711 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29712 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29713 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29714 IX86_BUILTIN_INSERTF64X2_256,
29715 IX86_BUILTIN_INSERTI64X2_256,
29716 IX86_BUILTIN_PSRAVV16HI,
29717 IX86_BUILTIN_PSRAVV8HI,
29718 IX86_BUILTIN_PMADDUBSW256_MASK,
29719 IX86_BUILTIN_PMADDUBSW128_MASK,
29720 IX86_BUILTIN_PMADDWD256_MASK,
29721 IX86_BUILTIN_PMADDWD128_MASK,
29722 IX86_BUILTIN_PSRLVV16HI,
29723 IX86_BUILTIN_PSRLVV8HI,
29724 IX86_BUILTIN_CVTPS2DQ256_MASK,
29725 IX86_BUILTIN_CVTPS2DQ128_MASK,
29726 IX86_BUILTIN_CVTPS2UDQ256,
29727 IX86_BUILTIN_CVTPS2UDQ128,
29728 IX86_BUILTIN_CVTPS2QQ256,
29729 IX86_BUILTIN_CVTPS2QQ128,
29730 IX86_BUILTIN_CVTPS2UQQ256,
29731 IX86_BUILTIN_CVTPS2UQQ128,
29732 IX86_BUILTIN_GETMANTPS256,
29733 IX86_BUILTIN_GETMANTPS128,
29734 IX86_BUILTIN_GETMANTPD256,
29735 IX86_BUILTIN_GETMANTPD128,
29736 IX86_BUILTIN_MOVDDUP256_MASK,
29737 IX86_BUILTIN_MOVDDUP128_MASK,
29738 IX86_BUILTIN_MOVSHDUP256_MASK,
29739 IX86_BUILTIN_MOVSHDUP128_MASK,
29740 IX86_BUILTIN_MOVSLDUP256_MASK,
29741 IX86_BUILTIN_MOVSLDUP128_MASK,
29742 IX86_BUILTIN_CVTQQ2PS256,
29743 IX86_BUILTIN_CVTQQ2PS128,
29744 IX86_BUILTIN_CVTUQQ2PS256,
29745 IX86_BUILTIN_CVTUQQ2PS128,
29746 IX86_BUILTIN_CVTQQ2PD256,
29747 IX86_BUILTIN_CVTQQ2PD128,
29748 IX86_BUILTIN_CVTUQQ2PD256,
29749 IX86_BUILTIN_CVTUQQ2PD128,
29750 IX86_BUILTIN_VPERMT2VARQ256,
29751 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29752 IX86_BUILTIN_VPERMT2VARD256,
29753 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29754 IX86_BUILTIN_VPERMI2VARQ256,
29755 IX86_BUILTIN_VPERMI2VARD256,
29756 IX86_BUILTIN_VPERMT2VARPD256,
29757 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29758 IX86_BUILTIN_VPERMT2VARPS256,
29759 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29760 IX86_BUILTIN_VPERMI2VARPD256,
29761 IX86_BUILTIN_VPERMI2VARPS256,
29762 IX86_BUILTIN_VPERMT2VARQ128,
29763 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29764 IX86_BUILTIN_VPERMT2VARD128,
29765 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29766 IX86_BUILTIN_VPERMI2VARQ128,
29767 IX86_BUILTIN_VPERMI2VARD128,
29768 IX86_BUILTIN_VPERMT2VARPD128,
29769 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29770 IX86_BUILTIN_VPERMT2VARPS128,
29771 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29772 IX86_BUILTIN_VPERMI2VARPD128,
29773 IX86_BUILTIN_VPERMI2VARPS128,
29774 IX86_BUILTIN_PSHUFB256_MASK,
29775 IX86_BUILTIN_PSHUFB128_MASK,
29776 IX86_BUILTIN_PSHUFHW256_MASK,
29777 IX86_BUILTIN_PSHUFHW128_MASK,
29778 IX86_BUILTIN_PSHUFLW256_MASK,
29779 IX86_BUILTIN_PSHUFLW128_MASK,
29780 IX86_BUILTIN_PSHUFD256_MASK,
29781 IX86_BUILTIN_PSHUFD128_MASK,
29782 IX86_BUILTIN_SHUFPD256_MASK,
29783 IX86_BUILTIN_SHUFPD128_MASK,
29784 IX86_BUILTIN_SHUFPS256_MASK,
29785 IX86_BUILTIN_SHUFPS128_MASK,
29786 IX86_BUILTIN_PROLVQ256,
29787 IX86_BUILTIN_PROLVQ128,
29788 IX86_BUILTIN_PROLQ256,
29789 IX86_BUILTIN_PROLQ128,
29790 IX86_BUILTIN_PRORVQ256,
29791 IX86_BUILTIN_PRORVQ128,
29792 IX86_BUILTIN_PRORQ256,
29793 IX86_BUILTIN_PRORQ128,
29794 IX86_BUILTIN_PSRAVQ128,
29795 IX86_BUILTIN_PSRAVQ256,
29796 IX86_BUILTIN_PSLLVV4DI_MASK,
29797 IX86_BUILTIN_PSLLVV2DI_MASK,
29798 IX86_BUILTIN_PSLLVV8SI_MASK,
29799 IX86_BUILTIN_PSLLVV4SI_MASK,
29800 IX86_BUILTIN_PSRAVV8SI_MASK,
29801 IX86_BUILTIN_PSRAVV4SI_MASK,
29802 IX86_BUILTIN_PSRLVV4DI_MASK,
29803 IX86_BUILTIN_PSRLVV2DI_MASK,
29804 IX86_BUILTIN_PSRLVV8SI_MASK,
29805 IX86_BUILTIN_PSRLVV4SI_MASK,
29806 IX86_BUILTIN_PSRAWI256_MASK,
29807 IX86_BUILTIN_PSRAW256_MASK,
29808 IX86_BUILTIN_PSRAWI128_MASK,
29809 IX86_BUILTIN_PSRAW128_MASK,
29810 IX86_BUILTIN_PSRLWI256_MASK,
29811 IX86_BUILTIN_PSRLW256_MASK,
29812 IX86_BUILTIN_PSRLWI128_MASK,
29813 IX86_BUILTIN_PSRLW128_MASK,
29814 IX86_BUILTIN_PRORVD256,
29815 IX86_BUILTIN_PROLVD256,
29816 IX86_BUILTIN_PRORD256,
29817 IX86_BUILTIN_PROLD256,
29818 IX86_BUILTIN_PRORVD128,
29819 IX86_BUILTIN_PROLVD128,
29820 IX86_BUILTIN_PRORD128,
29821 IX86_BUILTIN_PROLD128,
29822 IX86_BUILTIN_FPCLASSPD256,
29823 IX86_BUILTIN_FPCLASSPD128,
29824 IX86_BUILTIN_FPCLASSSD,
29825 IX86_BUILTIN_FPCLASSPS256,
29826 IX86_BUILTIN_FPCLASSPS128,
29827 IX86_BUILTIN_FPCLASSSS,
29828 IX86_BUILTIN_CVTB2MASK128,
29829 IX86_BUILTIN_CVTB2MASK256,
29830 IX86_BUILTIN_CVTW2MASK128,
29831 IX86_BUILTIN_CVTW2MASK256,
29832 IX86_BUILTIN_CVTD2MASK128,
29833 IX86_BUILTIN_CVTD2MASK256,
29834 IX86_BUILTIN_CVTQ2MASK128,
29835 IX86_BUILTIN_CVTQ2MASK256,
29836 IX86_BUILTIN_CVTMASK2B128,
29837 IX86_BUILTIN_CVTMASK2B256,
29838 IX86_BUILTIN_CVTMASK2W128,
29839 IX86_BUILTIN_CVTMASK2W256,
29840 IX86_BUILTIN_CVTMASK2D128,
29841 IX86_BUILTIN_CVTMASK2D256,
29842 IX86_BUILTIN_CVTMASK2Q128,
29843 IX86_BUILTIN_CVTMASK2Q256,
29844 IX86_BUILTIN_PCMPEQB128_MASK,
29845 IX86_BUILTIN_PCMPEQB256_MASK,
29846 IX86_BUILTIN_PCMPEQW128_MASK,
29847 IX86_BUILTIN_PCMPEQW256_MASK,
29848 IX86_BUILTIN_PCMPEQD128_MASK,
29849 IX86_BUILTIN_PCMPEQD256_MASK,
29850 IX86_BUILTIN_PCMPEQQ128_MASK,
29851 IX86_BUILTIN_PCMPEQQ256_MASK,
29852 IX86_BUILTIN_PCMPGTB128_MASK,
29853 IX86_BUILTIN_PCMPGTB256_MASK,
29854 IX86_BUILTIN_PCMPGTW128_MASK,
29855 IX86_BUILTIN_PCMPGTW256_MASK,
29856 IX86_BUILTIN_PCMPGTD128_MASK,
29857 IX86_BUILTIN_PCMPGTD256_MASK,
29858 IX86_BUILTIN_PCMPGTQ128_MASK,
29859 IX86_BUILTIN_PCMPGTQ256_MASK,
29860 IX86_BUILTIN_PTESTMB128,
29861 IX86_BUILTIN_PTESTMB256,
29862 IX86_BUILTIN_PTESTMW128,
29863 IX86_BUILTIN_PTESTMW256,
29864 IX86_BUILTIN_PTESTMD128,
29865 IX86_BUILTIN_PTESTMD256,
29866 IX86_BUILTIN_PTESTMQ128,
29867 IX86_BUILTIN_PTESTMQ256,
29868 IX86_BUILTIN_PTESTNMB128,
29869 IX86_BUILTIN_PTESTNMB256,
29870 IX86_BUILTIN_PTESTNMW128,
29871 IX86_BUILTIN_PTESTNMW256,
29872 IX86_BUILTIN_PTESTNMD128,
29873 IX86_BUILTIN_PTESTNMD256,
29874 IX86_BUILTIN_PTESTNMQ128,
29875 IX86_BUILTIN_PTESTNMQ256,
29876 IX86_BUILTIN_PBROADCASTMB128,
29877 IX86_BUILTIN_PBROADCASTMB256,
29878 IX86_BUILTIN_PBROADCASTMW128,
29879 IX86_BUILTIN_PBROADCASTMW256,
29880 IX86_BUILTIN_COMPRESSPD256,
29881 IX86_BUILTIN_COMPRESSPD128,
29882 IX86_BUILTIN_COMPRESSPS256,
29883 IX86_BUILTIN_COMPRESSPS128,
29884 IX86_BUILTIN_PCOMPRESSQ256,
29885 IX86_BUILTIN_PCOMPRESSQ128,
29886 IX86_BUILTIN_PCOMPRESSD256,
29887 IX86_BUILTIN_PCOMPRESSD128,
29888 IX86_BUILTIN_EXPANDPD256,
29889 IX86_BUILTIN_EXPANDPD128,
29890 IX86_BUILTIN_EXPANDPS256,
29891 IX86_BUILTIN_EXPANDPS128,
29892 IX86_BUILTIN_PEXPANDQ256,
29893 IX86_BUILTIN_PEXPANDQ128,
29894 IX86_BUILTIN_PEXPANDD256,
29895 IX86_BUILTIN_PEXPANDD128,
29896 IX86_BUILTIN_EXPANDPD256Z,
29897 IX86_BUILTIN_EXPANDPD128Z,
29898 IX86_BUILTIN_EXPANDPS256Z,
29899 IX86_BUILTIN_EXPANDPS128Z,
29900 IX86_BUILTIN_PEXPANDQ256Z,
29901 IX86_BUILTIN_PEXPANDQ128Z,
29902 IX86_BUILTIN_PEXPANDD256Z,
29903 IX86_BUILTIN_PEXPANDD128Z,
29904 IX86_BUILTIN_PMAXSD256_MASK,
29905 IX86_BUILTIN_PMINSD256_MASK,
29906 IX86_BUILTIN_PMAXUD256_MASK,
29907 IX86_BUILTIN_PMINUD256_MASK,
29908 IX86_BUILTIN_PMAXSD128_MASK,
29909 IX86_BUILTIN_PMINSD128_MASK,
29910 IX86_BUILTIN_PMAXUD128_MASK,
29911 IX86_BUILTIN_PMINUD128_MASK,
29912 IX86_BUILTIN_PMAXSQ256_MASK,
29913 IX86_BUILTIN_PMINSQ256_MASK,
29914 IX86_BUILTIN_PMAXUQ256_MASK,
29915 IX86_BUILTIN_PMINUQ256_MASK,
29916 IX86_BUILTIN_PMAXSQ128_MASK,
29917 IX86_BUILTIN_PMINSQ128_MASK,
29918 IX86_BUILTIN_PMAXUQ128_MASK,
29919 IX86_BUILTIN_PMINUQ128_MASK,
29920 IX86_BUILTIN_PMINSB256_MASK,
29921 IX86_BUILTIN_PMINUB256_MASK,
29922 IX86_BUILTIN_PMAXSB256_MASK,
29923 IX86_BUILTIN_PMAXUB256_MASK,
29924 IX86_BUILTIN_PMINSB128_MASK,
29925 IX86_BUILTIN_PMINUB128_MASK,
29926 IX86_BUILTIN_PMAXSB128_MASK,
29927 IX86_BUILTIN_PMAXUB128_MASK,
29928 IX86_BUILTIN_PMINSW256_MASK,
29929 IX86_BUILTIN_PMINUW256_MASK,
29930 IX86_BUILTIN_PMAXSW256_MASK,
29931 IX86_BUILTIN_PMAXUW256_MASK,
29932 IX86_BUILTIN_PMINSW128_MASK,
29933 IX86_BUILTIN_PMINUW128_MASK,
29934 IX86_BUILTIN_PMAXSW128_MASK,
29935 IX86_BUILTIN_PMAXUW128_MASK,
29936 IX86_BUILTIN_VPCONFLICTQ256,
29937 IX86_BUILTIN_VPCONFLICTD256,
29938 IX86_BUILTIN_VPCLZCNTQ256,
29939 IX86_BUILTIN_VPCLZCNTD256,
29940 IX86_BUILTIN_UNPCKHPD256_MASK,
29941 IX86_BUILTIN_UNPCKHPD128_MASK,
29942 IX86_BUILTIN_UNPCKHPS256_MASK,
29943 IX86_BUILTIN_UNPCKHPS128_MASK,
29944 IX86_BUILTIN_UNPCKLPD256_MASK,
29945 IX86_BUILTIN_UNPCKLPD128_MASK,
29946 IX86_BUILTIN_UNPCKLPS256_MASK,
29947 IX86_BUILTIN_VPCONFLICTQ128,
29948 IX86_BUILTIN_VPCONFLICTD128,
29949 IX86_BUILTIN_VPCLZCNTQ128,
29950 IX86_BUILTIN_VPCLZCNTD128,
29951 IX86_BUILTIN_UNPCKLPS128_MASK,
29952 IX86_BUILTIN_ALIGND256,
29953 IX86_BUILTIN_ALIGNQ256,
29954 IX86_BUILTIN_ALIGND128,
29955 IX86_BUILTIN_ALIGNQ128,
29956 IX86_BUILTIN_CVTPS2PH256_MASK,
29957 IX86_BUILTIN_CVTPS2PH_MASK,
29958 IX86_BUILTIN_CVTPH2PS_MASK,
29959 IX86_BUILTIN_CVTPH2PS256_MASK,
29960 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29961 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29962 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29963 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29964 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29965 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29966 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29967 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29968 IX86_BUILTIN_PUNPCKHBW128_MASK,
29969 IX86_BUILTIN_PUNPCKHBW256_MASK,
29970 IX86_BUILTIN_PUNPCKHWD128_MASK,
29971 IX86_BUILTIN_PUNPCKHWD256_MASK,
29972 IX86_BUILTIN_PUNPCKLBW128_MASK,
29973 IX86_BUILTIN_PUNPCKLBW256_MASK,
29974 IX86_BUILTIN_PUNPCKLWD128_MASK,
29975 IX86_BUILTIN_PUNPCKLWD256_MASK,
29976 IX86_BUILTIN_PSLLVV16HI,
29977 IX86_BUILTIN_PSLLVV8HI,
29978 IX86_BUILTIN_PACKSSDW256_MASK,
29979 IX86_BUILTIN_PACKSSDW128_MASK,
29980 IX86_BUILTIN_PACKUSDW256_MASK,
29981 IX86_BUILTIN_PACKUSDW128_MASK,
29982 IX86_BUILTIN_PAVGB256_MASK,
29983 IX86_BUILTIN_PAVGW256_MASK,
29984 IX86_BUILTIN_PAVGB128_MASK,
29985 IX86_BUILTIN_PAVGW128_MASK,
29986 IX86_BUILTIN_VPERMVARSF256_MASK,
29987 IX86_BUILTIN_VPERMVARDF256_MASK,
29988 IX86_BUILTIN_VPERMDF256_MASK,
29989 IX86_BUILTIN_PABSB256_MASK,
29990 IX86_BUILTIN_PABSB128_MASK,
29991 IX86_BUILTIN_PABSW256_MASK,
29992 IX86_BUILTIN_PABSW128_MASK,
29993 IX86_BUILTIN_VPERMILVARPD_MASK,
29994 IX86_BUILTIN_VPERMILVARPS_MASK,
29995 IX86_BUILTIN_VPERMILVARPD256_MASK,
29996 IX86_BUILTIN_VPERMILVARPS256_MASK,
29997 IX86_BUILTIN_VPERMILPD_MASK,
29998 IX86_BUILTIN_VPERMILPS_MASK,
29999 IX86_BUILTIN_VPERMILPD256_MASK,
30000 IX86_BUILTIN_VPERMILPS256_MASK,
30001 IX86_BUILTIN_BLENDMQ256,
30002 IX86_BUILTIN_BLENDMD256,
30003 IX86_BUILTIN_BLENDMPD256,
30004 IX86_BUILTIN_BLENDMPS256,
30005 IX86_BUILTIN_BLENDMQ128,
30006 IX86_BUILTIN_BLENDMD128,
30007 IX86_BUILTIN_BLENDMPD128,
30008 IX86_BUILTIN_BLENDMPS128,
30009 IX86_BUILTIN_BLENDMW256,
30010 IX86_BUILTIN_BLENDMB256,
30011 IX86_BUILTIN_BLENDMW128,
30012 IX86_BUILTIN_BLENDMB128,
30013 IX86_BUILTIN_PMULLD256_MASK,
30014 IX86_BUILTIN_PMULLD128_MASK,
30015 IX86_BUILTIN_PMULUDQ256_MASK,
30016 IX86_BUILTIN_PMULDQ256_MASK,
30017 IX86_BUILTIN_PMULDQ128_MASK,
30018 IX86_BUILTIN_PMULUDQ128_MASK,
30019 IX86_BUILTIN_CVTPD2PS256_MASK,
30020 IX86_BUILTIN_CVTPD2PS_MASK,
30021 IX86_BUILTIN_VPERMVARSI256_MASK,
30022 IX86_BUILTIN_VPERMVARDI256_MASK,
30023 IX86_BUILTIN_VPERMDI256_MASK,
30024 IX86_BUILTIN_CMPQ256,
30025 IX86_BUILTIN_CMPD256,
30026 IX86_BUILTIN_UCMPQ256,
30027 IX86_BUILTIN_UCMPD256,
30028 IX86_BUILTIN_CMPB256,
30029 IX86_BUILTIN_CMPW256,
30030 IX86_BUILTIN_UCMPB256,
30031 IX86_BUILTIN_UCMPW256,
30032 IX86_BUILTIN_CMPPD256_MASK,
30033 IX86_BUILTIN_CMPPS256_MASK,
30034 IX86_BUILTIN_CMPQ128,
30035 IX86_BUILTIN_CMPD128,
30036 IX86_BUILTIN_UCMPQ128,
30037 IX86_BUILTIN_UCMPD128,
30038 IX86_BUILTIN_CMPB128,
30039 IX86_BUILTIN_CMPW128,
30040 IX86_BUILTIN_UCMPB128,
30041 IX86_BUILTIN_UCMPW128,
30042 IX86_BUILTIN_CMPPD128_MASK,
30043 IX86_BUILTIN_CMPPS128_MASK,
30045 IX86_BUILTIN_GATHER3SIV8SF,
30046 IX86_BUILTIN_GATHER3SIV4SF,
30047 IX86_BUILTIN_GATHER3SIV4DF,
30048 IX86_BUILTIN_GATHER3SIV2DF,
30049 IX86_BUILTIN_GATHER3DIV8SF,
30050 IX86_BUILTIN_GATHER3DIV4SF,
30051 IX86_BUILTIN_GATHER3DIV4DF,
30052 IX86_BUILTIN_GATHER3DIV2DF,
30053 IX86_BUILTIN_GATHER3SIV8SI,
30054 IX86_BUILTIN_GATHER3SIV4SI,
30055 IX86_BUILTIN_GATHER3SIV4DI,
30056 IX86_BUILTIN_GATHER3SIV2DI,
30057 IX86_BUILTIN_GATHER3DIV8SI,
30058 IX86_BUILTIN_GATHER3DIV4SI,
30059 IX86_BUILTIN_GATHER3DIV4DI,
30060 IX86_BUILTIN_GATHER3DIV2DI,
30061 IX86_BUILTIN_SCATTERSIV8SF,
30062 IX86_BUILTIN_SCATTERSIV4SF,
30063 IX86_BUILTIN_SCATTERSIV4DF,
30064 IX86_BUILTIN_SCATTERSIV2DF,
30065 IX86_BUILTIN_SCATTERDIV8SF,
30066 IX86_BUILTIN_SCATTERDIV4SF,
30067 IX86_BUILTIN_SCATTERDIV4DF,
30068 IX86_BUILTIN_SCATTERDIV2DF,
30069 IX86_BUILTIN_SCATTERSIV8SI,
30070 IX86_BUILTIN_SCATTERSIV4SI,
30071 IX86_BUILTIN_SCATTERSIV4DI,
30072 IX86_BUILTIN_SCATTERSIV2DI,
30073 IX86_BUILTIN_SCATTERDIV8SI,
30074 IX86_BUILTIN_SCATTERDIV4SI,
30075 IX86_BUILTIN_SCATTERDIV4DI,
30076 IX86_BUILTIN_SCATTERDIV2DI,
30078 /* AVX512DQ. */
30079 IX86_BUILTIN_RANGESD128,
30080 IX86_BUILTIN_RANGESS128,
30081 IX86_BUILTIN_KUNPCKWD,
30082 IX86_BUILTIN_KUNPCKDQ,
30083 IX86_BUILTIN_BROADCASTF32x2_512,
30084 IX86_BUILTIN_BROADCASTI32x2_512,
30085 IX86_BUILTIN_BROADCASTF64X2_512,
30086 IX86_BUILTIN_BROADCASTI64X2_512,
30087 IX86_BUILTIN_BROADCASTF32X8_512,
30088 IX86_BUILTIN_BROADCASTI32X8_512,
30089 IX86_BUILTIN_EXTRACTF64X2_512,
30090 IX86_BUILTIN_EXTRACTF32X8,
30091 IX86_BUILTIN_EXTRACTI64X2_512,
30092 IX86_BUILTIN_EXTRACTI32X8,
30093 IX86_BUILTIN_REDUCEPD512_MASK,
30094 IX86_BUILTIN_REDUCEPS512_MASK,
30095 IX86_BUILTIN_PMULLQ512,
30096 IX86_BUILTIN_XORPD512,
30097 IX86_BUILTIN_XORPS512,
30098 IX86_BUILTIN_ORPD512,
30099 IX86_BUILTIN_ORPS512,
30100 IX86_BUILTIN_ANDPD512,
30101 IX86_BUILTIN_ANDPS512,
30102 IX86_BUILTIN_ANDNPD512,
30103 IX86_BUILTIN_ANDNPS512,
30104 IX86_BUILTIN_INSERTF32X8,
30105 IX86_BUILTIN_INSERTI32X8,
30106 IX86_BUILTIN_INSERTF64X2_512,
30107 IX86_BUILTIN_INSERTI64X2_512,
30108 IX86_BUILTIN_FPCLASSPD512,
30109 IX86_BUILTIN_FPCLASSPS512,
30110 IX86_BUILTIN_CVTD2MASK512,
30111 IX86_BUILTIN_CVTQ2MASK512,
30112 IX86_BUILTIN_CVTMASK2D512,
30113 IX86_BUILTIN_CVTMASK2Q512,
30114 IX86_BUILTIN_CVTPD2QQ512,
30115 IX86_BUILTIN_CVTPS2QQ512,
30116 IX86_BUILTIN_CVTPD2UQQ512,
30117 IX86_BUILTIN_CVTPS2UQQ512,
30118 IX86_BUILTIN_CVTQQ2PS512,
30119 IX86_BUILTIN_CVTUQQ2PS512,
30120 IX86_BUILTIN_CVTQQ2PD512,
30121 IX86_BUILTIN_CVTUQQ2PD512,
30122 IX86_BUILTIN_CVTTPS2QQ512,
30123 IX86_BUILTIN_CVTTPS2UQQ512,
30124 IX86_BUILTIN_CVTTPD2QQ512,
30125 IX86_BUILTIN_CVTTPD2UQQ512,
30126 IX86_BUILTIN_RANGEPS512,
30127 IX86_BUILTIN_RANGEPD512,
30129 /* AVX512BW. */
30130 IX86_BUILTIN_PACKUSDW512,
30131 IX86_BUILTIN_PACKSSDW512,
30132 IX86_BUILTIN_LOADDQUHI512_MASK,
30133 IX86_BUILTIN_LOADDQUQI512_MASK,
30134 IX86_BUILTIN_PSLLDQ512,
30135 IX86_BUILTIN_PSRLDQ512,
30136 IX86_BUILTIN_STOREDQUHI512_MASK,
30137 IX86_BUILTIN_STOREDQUQI512_MASK,
30138 IX86_BUILTIN_PALIGNR512,
30139 IX86_BUILTIN_PALIGNR512_MASK,
30140 IX86_BUILTIN_MOVDQUHI512_MASK,
30141 IX86_BUILTIN_MOVDQUQI512_MASK,
30142 IX86_BUILTIN_PSADBW512,
30143 IX86_BUILTIN_DBPSADBW512,
30144 IX86_BUILTIN_PBROADCASTB512,
30145 IX86_BUILTIN_PBROADCASTB512_GPR,
30146 IX86_BUILTIN_PBROADCASTW512,
30147 IX86_BUILTIN_PBROADCASTW512_GPR,
30148 IX86_BUILTIN_PMOVSXBW512_MASK,
30149 IX86_BUILTIN_PMOVZXBW512_MASK,
30150 IX86_BUILTIN_VPERMVARHI512_MASK,
30151 IX86_BUILTIN_VPERMT2VARHI512,
30152 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30153 IX86_BUILTIN_VPERMI2VARHI512,
30154 IX86_BUILTIN_PAVGB512,
30155 IX86_BUILTIN_PAVGW512,
30156 IX86_BUILTIN_PADDB512,
30157 IX86_BUILTIN_PSUBB512,
30158 IX86_BUILTIN_PSUBSB512,
30159 IX86_BUILTIN_PADDSB512,
30160 IX86_BUILTIN_PSUBUSB512,
30161 IX86_BUILTIN_PADDUSB512,
30162 IX86_BUILTIN_PSUBW512,
30163 IX86_BUILTIN_PADDW512,
30164 IX86_BUILTIN_PSUBSW512,
30165 IX86_BUILTIN_PADDSW512,
30166 IX86_BUILTIN_PSUBUSW512,
30167 IX86_BUILTIN_PADDUSW512,
30168 IX86_BUILTIN_PMAXUW512,
30169 IX86_BUILTIN_PMAXSW512,
30170 IX86_BUILTIN_PMINUW512,
30171 IX86_BUILTIN_PMINSW512,
30172 IX86_BUILTIN_PMAXUB512,
30173 IX86_BUILTIN_PMAXSB512,
30174 IX86_BUILTIN_PMINUB512,
30175 IX86_BUILTIN_PMINSB512,
30176 IX86_BUILTIN_PMOVWB512,
30177 IX86_BUILTIN_PMOVSWB512,
30178 IX86_BUILTIN_PMOVUSWB512,
30179 IX86_BUILTIN_PMULHRSW512_MASK,
30180 IX86_BUILTIN_PMULHUW512_MASK,
30181 IX86_BUILTIN_PMULHW512_MASK,
30182 IX86_BUILTIN_PMULLW512_MASK,
30183 IX86_BUILTIN_PSLLWI512_MASK,
30184 IX86_BUILTIN_PSLLW512_MASK,
30185 IX86_BUILTIN_PACKSSWB512,
30186 IX86_BUILTIN_PACKUSWB512,
30187 IX86_BUILTIN_PSRAVV32HI,
30188 IX86_BUILTIN_PMADDUBSW512_MASK,
30189 IX86_BUILTIN_PMADDWD512_MASK,
30190 IX86_BUILTIN_PSRLVV32HI,
30191 IX86_BUILTIN_PUNPCKHBW512,
30192 IX86_BUILTIN_PUNPCKHWD512,
30193 IX86_BUILTIN_PUNPCKLBW512,
30194 IX86_BUILTIN_PUNPCKLWD512,
30195 IX86_BUILTIN_PSHUFB512,
30196 IX86_BUILTIN_PSHUFHW512,
30197 IX86_BUILTIN_PSHUFLW512,
30198 IX86_BUILTIN_PSRAWI512,
30199 IX86_BUILTIN_PSRAW512,
30200 IX86_BUILTIN_PSRLWI512,
30201 IX86_BUILTIN_PSRLW512,
30202 IX86_BUILTIN_CVTB2MASK512,
30203 IX86_BUILTIN_CVTW2MASK512,
30204 IX86_BUILTIN_CVTMASK2B512,
30205 IX86_BUILTIN_CVTMASK2W512,
30206 IX86_BUILTIN_PCMPEQB512_MASK,
30207 IX86_BUILTIN_PCMPEQW512_MASK,
30208 IX86_BUILTIN_PCMPGTB512_MASK,
30209 IX86_BUILTIN_PCMPGTW512_MASK,
30210 IX86_BUILTIN_PTESTMB512,
30211 IX86_BUILTIN_PTESTMW512,
30212 IX86_BUILTIN_PTESTNMB512,
30213 IX86_BUILTIN_PTESTNMW512,
30214 IX86_BUILTIN_PSLLVV32HI,
30215 IX86_BUILTIN_PABSB512,
30216 IX86_BUILTIN_PABSW512,
30217 IX86_BUILTIN_BLENDMW512,
30218 IX86_BUILTIN_BLENDMB512,
30219 IX86_BUILTIN_CMPB512,
30220 IX86_BUILTIN_CMPW512,
30221 IX86_BUILTIN_UCMPB512,
30222 IX86_BUILTIN_UCMPW512,
30224 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30225 where all operands are 32-byte or 64-byte wide respectively. */
30226 IX86_BUILTIN_GATHERALTSIV4DF,
30227 IX86_BUILTIN_GATHERALTDIV8SF,
30228 IX86_BUILTIN_GATHERALTSIV4DI,
30229 IX86_BUILTIN_GATHERALTDIV8SI,
30230 IX86_BUILTIN_GATHER3ALTDIV16SF,
30231 IX86_BUILTIN_GATHER3ALTDIV16SI,
30232 IX86_BUILTIN_GATHER3ALTSIV4DF,
30233 IX86_BUILTIN_GATHER3ALTDIV8SF,
30234 IX86_BUILTIN_GATHER3ALTSIV4DI,
30235 IX86_BUILTIN_GATHER3ALTDIV8SI,
30236 IX86_BUILTIN_GATHER3ALTSIV8DF,
30237 IX86_BUILTIN_GATHER3ALTSIV8DI,
30238 IX86_BUILTIN_GATHER3DIV16SF,
30239 IX86_BUILTIN_GATHER3DIV16SI,
30240 IX86_BUILTIN_GATHER3DIV8DF,
30241 IX86_BUILTIN_GATHER3DIV8DI,
30242 IX86_BUILTIN_GATHER3SIV16SF,
30243 IX86_BUILTIN_GATHER3SIV16SI,
30244 IX86_BUILTIN_GATHER3SIV8DF,
30245 IX86_BUILTIN_GATHER3SIV8DI,
30246 IX86_BUILTIN_SCATTERDIV16SF,
30247 IX86_BUILTIN_SCATTERDIV16SI,
30248 IX86_BUILTIN_SCATTERDIV8DF,
30249 IX86_BUILTIN_SCATTERDIV8DI,
30250 IX86_BUILTIN_SCATTERSIV16SF,
30251 IX86_BUILTIN_SCATTERSIV16SI,
30252 IX86_BUILTIN_SCATTERSIV8DF,
30253 IX86_BUILTIN_SCATTERSIV8DI,
30255 /* AVX512PF */
30256 IX86_BUILTIN_GATHERPFQPD,
30257 IX86_BUILTIN_GATHERPFDPS,
30258 IX86_BUILTIN_GATHERPFDPD,
30259 IX86_BUILTIN_GATHERPFQPS,
30260 IX86_BUILTIN_SCATTERPFDPD,
30261 IX86_BUILTIN_SCATTERPFDPS,
30262 IX86_BUILTIN_SCATTERPFQPD,
30263 IX86_BUILTIN_SCATTERPFQPS,
30265 /* AVX-512ER */
30266 IX86_BUILTIN_EXP2PD_MASK,
30267 IX86_BUILTIN_EXP2PS_MASK,
30268 IX86_BUILTIN_EXP2PS,
30269 IX86_BUILTIN_RCP28PD,
30270 IX86_BUILTIN_RCP28PS,
30271 IX86_BUILTIN_RCP28SD,
30272 IX86_BUILTIN_RCP28SS,
30273 IX86_BUILTIN_RSQRT28PD,
30274 IX86_BUILTIN_RSQRT28PS,
30275 IX86_BUILTIN_RSQRT28SD,
30276 IX86_BUILTIN_RSQRT28SS,
30278 /* AVX-512IFMA */
30279 IX86_BUILTIN_VPMADD52LUQ512,
30280 IX86_BUILTIN_VPMADD52HUQ512,
30281 IX86_BUILTIN_VPMADD52LUQ256,
30282 IX86_BUILTIN_VPMADD52HUQ256,
30283 IX86_BUILTIN_VPMADD52LUQ128,
30284 IX86_BUILTIN_VPMADD52HUQ128,
30285 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30286 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30287 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30288 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30289 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30290 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30292 /* AVX-512VBMI */
30293 IX86_BUILTIN_VPMULTISHIFTQB512,
30294 IX86_BUILTIN_VPMULTISHIFTQB256,
30295 IX86_BUILTIN_VPMULTISHIFTQB128,
30296 IX86_BUILTIN_VPERMVARQI512_MASK,
30297 IX86_BUILTIN_VPERMT2VARQI512,
30298 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30299 IX86_BUILTIN_VPERMI2VARQI512,
30300 IX86_BUILTIN_VPERMVARQI256_MASK,
30301 IX86_BUILTIN_VPERMVARQI128_MASK,
30302 IX86_BUILTIN_VPERMT2VARQI256,
30303 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30304 IX86_BUILTIN_VPERMT2VARQI128,
30305 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30306 IX86_BUILTIN_VPERMI2VARQI256,
30307 IX86_BUILTIN_VPERMI2VARQI128,
30309 /* SHA builtins. */
30310 IX86_BUILTIN_SHA1MSG1,
30311 IX86_BUILTIN_SHA1MSG2,
30312 IX86_BUILTIN_SHA1NEXTE,
30313 IX86_BUILTIN_SHA1RNDS4,
30314 IX86_BUILTIN_SHA256MSG1,
30315 IX86_BUILTIN_SHA256MSG2,
30316 IX86_BUILTIN_SHA256RNDS2,
30318 /* CLWB instructions. */
30319 IX86_BUILTIN_CLWB,
30321 /* PCOMMIT instructions. */
30322 IX86_BUILTIN_PCOMMIT,
30324 /* CLFLUSHOPT instructions. */
30325 IX86_BUILTIN_CLFLUSHOPT,
30327 /* TFmode support builtins. */
30328 IX86_BUILTIN_INFQ,
30329 IX86_BUILTIN_HUGE_VALQ,
30330 IX86_BUILTIN_FABSQ,
30331 IX86_BUILTIN_COPYSIGNQ,
30333 /* Vectorizer support builtins. */
30334 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30335 IX86_BUILTIN_CPYSGNPS,
30336 IX86_BUILTIN_CPYSGNPD,
30337 IX86_BUILTIN_CPYSGNPS256,
30338 IX86_BUILTIN_CPYSGNPS512,
30339 IX86_BUILTIN_CPYSGNPD256,
30340 IX86_BUILTIN_CPYSGNPD512,
30341 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30342 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30345 /* FMA4 instructions. */
30346 IX86_BUILTIN_VFMADDSS,
30347 IX86_BUILTIN_VFMADDSD,
30348 IX86_BUILTIN_VFMADDPS,
30349 IX86_BUILTIN_VFMADDPD,
30350 IX86_BUILTIN_VFMADDPS256,
30351 IX86_BUILTIN_VFMADDPD256,
30352 IX86_BUILTIN_VFMADDSUBPS,
30353 IX86_BUILTIN_VFMADDSUBPD,
30354 IX86_BUILTIN_VFMADDSUBPS256,
30355 IX86_BUILTIN_VFMADDSUBPD256,
30357 /* FMA3 instructions. */
30358 IX86_BUILTIN_VFMADDSS3,
30359 IX86_BUILTIN_VFMADDSD3,
30361 /* XOP instructions. */
30362 IX86_BUILTIN_VPCMOV,
30363 IX86_BUILTIN_VPCMOV_V2DI,
30364 IX86_BUILTIN_VPCMOV_V4SI,
30365 IX86_BUILTIN_VPCMOV_V8HI,
30366 IX86_BUILTIN_VPCMOV_V16QI,
30367 IX86_BUILTIN_VPCMOV_V4SF,
30368 IX86_BUILTIN_VPCMOV_V2DF,
30369 IX86_BUILTIN_VPCMOV256,
30370 IX86_BUILTIN_VPCMOV_V4DI256,
30371 IX86_BUILTIN_VPCMOV_V8SI256,
30372 IX86_BUILTIN_VPCMOV_V16HI256,
30373 IX86_BUILTIN_VPCMOV_V32QI256,
30374 IX86_BUILTIN_VPCMOV_V8SF256,
30375 IX86_BUILTIN_VPCMOV_V4DF256,
30377 IX86_BUILTIN_VPPERM,
30379 IX86_BUILTIN_VPMACSSWW,
30380 IX86_BUILTIN_VPMACSWW,
30381 IX86_BUILTIN_VPMACSSWD,
30382 IX86_BUILTIN_VPMACSWD,
30383 IX86_BUILTIN_VPMACSSDD,
30384 IX86_BUILTIN_VPMACSDD,
30385 IX86_BUILTIN_VPMACSSDQL,
30386 IX86_BUILTIN_VPMACSSDQH,
30387 IX86_BUILTIN_VPMACSDQL,
30388 IX86_BUILTIN_VPMACSDQH,
30389 IX86_BUILTIN_VPMADCSSWD,
30390 IX86_BUILTIN_VPMADCSWD,
30392 IX86_BUILTIN_VPHADDBW,
30393 IX86_BUILTIN_VPHADDBD,
30394 IX86_BUILTIN_VPHADDBQ,
30395 IX86_BUILTIN_VPHADDWD,
30396 IX86_BUILTIN_VPHADDWQ,
30397 IX86_BUILTIN_VPHADDDQ,
30398 IX86_BUILTIN_VPHADDUBW,
30399 IX86_BUILTIN_VPHADDUBD,
30400 IX86_BUILTIN_VPHADDUBQ,
30401 IX86_BUILTIN_VPHADDUWD,
30402 IX86_BUILTIN_VPHADDUWQ,
30403 IX86_BUILTIN_VPHADDUDQ,
30404 IX86_BUILTIN_VPHSUBBW,
30405 IX86_BUILTIN_VPHSUBWD,
30406 IX86_BUILTIN_VPHSUBDQ,
30408 IX86_BUILTIN_VPROTB,
30409 IX86_BUILTIN_VPROTW,
30410 IX86_BUILTIN_VPROTD,
30411 IX86_BUILTIN_VPROTQ,
30412 IX86_BUILTIN_VPROTB_IMM,
30413 IX86_BUILTIN_VPROTW_IMM,
30414 IX86_BUILTIN_VPROTD_IMM,
30415 IX86_BUILTIN_VPROTQ_IMM,
30417 IX86_BUILTIN_VPSHLB,
30418 IX86_BUILTIN_VPSHLW,
30419 IX86_BUILTIN_VPSHLD,
30420 IX86_BUILTIN_VPSHLQ,
30421 IX86_BUILTIN_VPSHAB,
30422 IX86_BUILTIN_VPSHAW,
30423 IX86_BUILTIN_VPSHAD,
30424 IX86_BUILTIN_VPSHAQ,
30426 IX86_BUILTIN_VFRCZSS,
30427 IX86_BUILTIN_VFRCZSD,
30428 IX86_BUILTIN_VFRCZPS,
30429 IX86_BUILTIN_VFRCZPD,
30430 IX86_BUILTIN_VFRCZPS256,
30431 IX86_BUILTIN_VFRCZPD256,
30433 IX86_BUILTIN_VPCOMEQUB,
30434 IX86_BUILTIN_VPCOMNEUB,
30435 IX86_BUILTIN_VPCOMLTUB,
30436 IX86_BUILTIN_VPCOMLEUB,
30437 IX86_BUILTIN_VPCOMGTUB,
30438 IX86_BUILTIN_VPCOMGEUB,
30439 IX86_BUILTIN_VPCOMFALSEUB,
30440 IX86_BUILTIN_VPCOMTRUEUB,
30442 IX86_BUILTIN_VPCOMEQUW,
30443 IX86_BUILTIN_VPCOMNEUW,
30444 IX86_BUILTIN_VPCOMLTUW,
30445 IX86_BUILTIN_VPCOMLEUW,
30446 IX86_BUILTIN_VPCOMGTUW,
30447 IX86_BUILTIN_VPCOMGEUW,
30448 IX86_BUILTIN_VPCOMFALSEUW,
30449 IX86_BUILTIN_VPCOMTRUEUW,
30451 IX86_BUILTIN_VPCOMEQUD,
30452 IX86_BUILTIN_VPCOMNEUD,
30453 IX86_BUILTIN_VPCOMLTUD,
30454 IX86_BUILTIN_VPCOMLEUD,
30455 IX86_BUILTIN_VPCOMGTUD,
30456 IX86_BUILTIN_VPCOMGEUD,
30457 IX86_BUILTIN_VPCOMFALSEUD,
30458 IX86_BUILTIN_VPCOMTRUEUD,
30460 IX86_BUILTIN_VPCOMEQUQ,
30461 IX86_BUILTIN_VPCOMNEUQ,
30462 IX86_BUILTIN_VPCOMLTUQ,
30463 IX86_BUILTIN_VPCOMLEUQ,
30464 IX86_BUILTIN_VPCOMGTUQ,
30465 IX86_BUILTIN_VPCOMGEUQ,
30466 IX86_BUILTIN_VPCOMFALSEUQ,
30467 IX86_BUILTIN_VPCOMTRUEUQ,
30469 IX86_BUILTIN_VPCOMEQB,
30470 IX86_BUILTIN_VPCOMNEB,
30471 IX86_BUILTIN_VPCOMLTB,
30472 IX86_BUILTIN_VPCOMLEB,
30473 IX86_BUILTIN_VPCOMGTB,
30474 IX86_BUILTIN_VPCOMGEB,
30475 IX86_BUILTIN_VPCOMFALSEB,
30476 IX86_BUILTIN_VPCOMTRUEB,
30478 IX86_BUILTIN_VPCOMEQW,
30479 IX86_BUILTIN_VPCOMNEW,
30480 IX86_BUILTIN_VPCOMLTW,
30481 IX86_BUILTIN_VPCOMLEW,
30482 IX86_BUILTIN_VPCOMGTW,
30483 IX86_BUILTIN_VPCOMGEW,
30484 IX86_BUILTIN_VPCOMFALSEW,
30485 IX86_BUILTIN_VPCOMTRUEW,
30487 IX86_BUILTIN_VPCOMEQD,
30488 IX86_BUILTIN_VPCOMNED,
30489 IX86_BUILTIN_VPCOMLTD,
30490 IX86_BUILTIN_VPCOMLED,
30491 IX86_BUILTIN_VPCOMGTD,
30492 IX86_BUILTIN_VPCOMGED,
30493 IX86_BUILTIN_VPCOMFALSED,
30494 IX86_BUILTIN_VPCOMTRUED,
30496 IX86_BUILTIN_VPCOMEQQ,
30497 IX86_BUILTIN_VPCOMNEQ,
30498 IX86_BUILTIN_VPCOMLTQ,
30499 IX86_BUILTIN_VPCOMLEQ,
30500 IX86_BUILTIN_VPCOMGTQ,
30501 IX86_BUILTIN_VPCOMGEQ,
30502 IX86_BUILTIN_VPCOMFALSEQ,
30503 IX86_BUILTIN_VPCOMTRUEQ,
30505 /* LWP instructions. */
30506 IX86_BUILTIN_LLWPCB,
30507 IX86_BUILTIN_SLWPCB,
30508 IX86_BUILTIN_LWPVAL32,
30509 IX86_BUILTIN_LWPVAL64,
30510 IX86_BUILTIN_LWPINS32,
30511 IX86_BUILTIN_LWPINS64,
30513 IX86_BUILTIN_CLZS,
30515 /* RTM */
30516 IX86_BUILTIN_XBEGIN,
30517 IX86_BUILTIN_XEND,
30518 IX86_BUILTIN_XABORT,
30519 IX86_BUILTIN_XTEST,
30521 /* MPX */
30522 IX86_BUILTIN_BNDMK,
30523 IX86_BUILTIN_BNDSTX,
30524 IX86_BUILTIN_BNDLDX,
30525 IX86_BUILTIN_BNDCL,
30526 IX86_BUILTIN_BNDCU,
30527 IX86_BUILTIN_BNDRET,
30528 IX86_BUILTIN_BNDNARROW,
30529 IX86_BUILTIN_BNDINT,
30530 IX86_BUILTIN_SIZEOF,
30531 IX86_BUILTIN_BNDLOWER,
30532 IX86_BUILTIN_BNDUPPER,
30534 /* BMI instructions. */
30535 IX86_BUILTIN_BEXTR32,
30536 IX86_BUILTIN_BEXTR64,
30537 IX86_BUILTIN_CTZS,
30539 /* TBM instructions. */
30540 IX86_BUILTIN_BEXTRI32,
30541 IX86_BUILTIN_BEXTRI64,
30543 /* BMI2 instructions. */
30544 IX86_BUILTIN_BZHI32,
30545 IX86_BUILTIN_BZHI64,
30546 IX86_BUILTIN_PDEP32,
30547 IX86_BUILTIN_PDEP64,
30548 IX86_BUILTIN_PEXT32,
30549 IX86_BUILTIN_PEXT64,
30551 /* ADX instructions. */
30552 IX86_BUILTIN_ADDCARRYX32,
30553 IX86_BUILTIN_ADDCARRYX64,
30555 /* SBB instructions. */
30556 IX86_BUILTIN_SBB32,
30557 IX86_BUILTIN_SBB64,
30559 /* FSGSBASE instructions. */
30560 IX86_BUILTIN_RDFSBASE32,
30561 IX86_BUILTIN_RDFSBASE64,
30562 IX86_BUILTIN_RDGSBASE32,
30563 IX86_BUILTIN_RDGSBASE64,
30564 IX86_BUILTIN_WRFSBASE32,
30565 IX86_BUILTIN_WRFSBASE64,
30566 IX86_BUILTIN_WRGSBASE32,
30567 IX86_BUILTIN_WRGSBASE64,
30569 /* RDRND instructions. */
30570 IX86_BUILTIN_RDRAND16_STEP,
30571 IX86_BUILTIN_RDRAND32_STEP,
30572 IX86_BUILTIN_RDRAND64_STEP,
30574 /* RDSEED instructions. */
30575 IX86_BUILTIN_RDSEED16_STEP,
30576 IX86_BUILTIN_RDSEED32_STEP,
30577 IX86_BUILTIN_RDSEED64_STEP,
30579 /* F16C instructions. */
30580 IX86_BUILTIN_CVTPH2PS,
30581 IX86_BUILTIN_CVTPH2PS256,
30582 IX86_BUILTIN_CVTPS2PH,
30583 IX86_BUILTIN_CVTPS2PH256,
30585 /* CFString built-in for darwin */
30586 IX86_BUILTIN_CFSTRING,
30588 /* Builtins to get CPU type and supported features. */
30589 IX86_BUILTIN_CPU_INIT,
30590 IX86_BUILTIN_CPU_IS,
30591 IX86_BUILTIN_CPU_SUPPORTS,
30593 /* Read/write FLAGS register built-ins. */
30594 IX86_BUILTIN_READ_FLAGS,
30595 IX86_BUILTIN_WRITE_FLAGS,
30597 IX86_BUILTIN_MAX
30600 /* Table for the ix86 builtin decls. */
30601 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30603 /* Table of all of the builtin functions that are possible with different ISA's
30604 but are waiting to be built until a function is declared to use that
30605 ISA. */
30606 struct builtin_isa {
30607 const char *name; /* function name */
30608 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30609 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30610 bool const_p; /* true if the declaration is constant */
30611 bool leaf_p; /* true if the declaration has leaf attribute */
30612 bool nothrow_p; /* true if the declaration has nothrow attribute */
30613 bool set_and_not_built_p;
30616 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30618 /* Bits that can still enable any inclusion of a builtin. */
30619 static HOST_WIDE_INT deferred_isa_values = 0;
30621 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30622 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30623 function decl in the ix86_builtins array. Returns the function decl or
30624 NULL_TREE, if the builtin was not added.
30626 If the front end has a special hook for builtin functions, delay adding
30627 builtin functions that aren't in the current ISA until the ISA is changed
30628 with function specific optimization. Doing so, can save about 300K for the
30629 default compiler. When the builtin is expanded, check at that time whether
30630 it is valid.
30632 If the front end doesn't have a special hook, record all builtins, even if
30633 it isn't an instruction set in the current ISA in case the user uses
30634 function specific options for a different ISA, so that we don't get scope
30635 errors if a builtin is added in the middle of a function scope. */
30637 static inline tree
30638 def_builtin (HOST_WIDE_INT mask, const char *name,
30639 enum ix86_builtin_func_type tcode,
30640 enum ix86_builtins code)
30642 tree decl = NULL_TREE;
30644 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30646 ix86_builtins_isa[(int) code].isa = mask;
30648 mask &= ~OPTION_MASK_ISA_64BIT;
30649 if (mask == 0
30650 || (mask & ix86_isa_flags) != 0
30651 || (lang_hooks.builtin_function
30652 == lang_hooks.builtin_function_ext_scope))
30655 tree type = ix86_get_builtin_func_type (tcode);
30656 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30657 NULL, NULL_TREE);
30658 ix86_builtins[(int) code] = decl;
30659 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30661 else
30663 /* Just a MASK where set_and_not_built_p == true can potentially
30664 include a builtin. */
30665 deferred_isa_values |= mask;
30666 ix86_builtins[(int) code] = NULL_TREE;
30667 ix86_builtins_isa[(int) code].tcode = tcode;
30668 ix86_builtins_isa[(int) code].name = name;
30669 ix86_builtins_isa[(int) code].leaf_p = false;
30670 ix86_builtins_isa[(int) code].nothrow_p = false;
30671 ix86_builtins_isa[(int) code].const_p = false;
30672 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30676 return decl;
30679 /* Like def_builtin, but also marks the function decl "const". */
30681 static inline tree
30682 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30683 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30685 tree decl = def_builtin (mask, name, tcode, code);
30686 if (decl)
30687 TREE_READONLY (decl) = 1;
30688 else
30689 ix86_builtins_isa[(int) code].const_p = true;
30691 return decl;
30694 /* Add any new builtin functions for a given ISA that may not have been
30695 declared. This saves a bit of space compared to adding all of the
30696 declarations to the tree, even if we didn't use them. */
30698 static void
30699 ix86_add_new_builtins (HOST_WIDE_INT isa)
30701 if ((isa & deferred_isa_values) == 0)
30702 return;
30704 /* Bits in ISA value can be removed from potential isa values. */
30705 deferred_isa_values &= ~isa;
30707 int i;
30708 tree saved_current_target_pragma = current_target_pragma;
30709 current_target_pragma = NULL_TREE;
30711 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30713 if ((ix86_builtins_isa[i].isa & isa) != 0
30714 && ix86_builtins_isa[i].set_and_not_built_p)
30716 tree decl, type;
30718 /* Don't define the builtin again. */
30719 ix86_builtins_isa[i].set_and_not_built_p = false;
30721 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30722 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30723 type, i, BUILT_IN_MD, NULL,
30724 NULL_TREE);
30726 ix86_builtins[i] = decl;
30727 if (ix86_builtins_isa[i].const_p)
30728 TREE_READONLY (decl) = 1;
30729 if (ix86_builtins_isa[i].leaf_p)
30730 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30731 NULL_TREE);
30732 if (ix86_builtins_isa[i].nothrow_p)
30733 TREE_NOTHROW (decl) = 1;
30737 current_target_pragma = saved_current_target_pragma;
30740 /* Bits for builtin_description.flag. */
30742 /* Set when we don't support the comparison natively, and should
30743 swap_comparison in order to support it. */
30744 #define BUILTIN_DESC_SWAP_OPERANDS 1
30746 struct builtin_description
30748 const HOST_WIDE_INT mask;
30749 const enum insn_code icode;
30750 const char *const name;
30751 const enum ix86_builtins code;
30752 const enum rtx_code comparison;
30753 const int flag;
30756 static const struct builtin_description bdesc_comi[] =
30758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30784 static const struct builtin_description bdesc_pcmpestr[] =
30786 /* SSE4.2 */
30787 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30788 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30789 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30790 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30796 static const struct builtin_description bdesc_pcmpistr[] =
30798 /* SSE4.2 */
30799 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30800 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30801 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30802 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30803 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30804 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30805 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30808 /* Special builtins with variable number of arguments. */
30809 static const struct builtin_description bdesc_special_args[] =
30811 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30812 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30813 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30815 /* 80387 (for use internally for atomic compound assignment). */
30816 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30817 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30818 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30819 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30821 /* MMX */
30822 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30824 /* 3DNow! */
30825 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30827 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30828 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30829 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30830 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30831 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30832 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30833 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30834 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30835 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30837 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30838 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30839 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30840 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30841 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30842 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30843 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30844 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30846 /* SSE */
30847 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30848 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30849 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30854 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30856 /* SSE or 3DNow!A */
30857 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30858 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30860 /* SSE2 */
30861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30868 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30875 /* SSE3 */
30876 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30878 /* SSE4.1 */
30879 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30881 /* SSE4A */
30882 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30883 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30885 /* AVX */
30886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30889 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30890 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30891 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30916 /* AVX2 */
30917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30927 /* AVX512F */
30928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30976 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30977 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30978 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30979 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30980 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30981 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30983 /* FSGSBASE */
30984 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30985 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30986 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30987 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30988 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30989 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30990 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30991 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30993 /* RTM */
30994 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30995 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30996 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30998 /* AVX512BW */
30999 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31000 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31001 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31002 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31004 /* AVX512VL */
31005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31041 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31042 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31043 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31044 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31073 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31074 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31075 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31076 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31077 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31078 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31079 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31080 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31081 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31082 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31083 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31084 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31085 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31086 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31087 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31088 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31100 /* PCOMMIT. */
31101 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31104 /* Builtins with variable number of arguments. */
31105 static const struct builtin_description bdesc_args[] =
31107 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31108 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31109 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31110 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31111 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31112 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31113 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31115 /* MMX */
31116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31119 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31123 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31132 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31179 /* 3DNow! */
31180 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31182 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31185 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31186 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31187 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31188 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31189 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31190 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31191 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31192 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31193 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31194 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31195 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31196 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31201 /* 3DNow!A */
31202 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31203 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31204 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31205 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31206 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31207 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31209 /* SSE */
31210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31212 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31214 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31218 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31220 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31221 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31225 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31226 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31227 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31231 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31255 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31256 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31260 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31262 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31263 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31265 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31270 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31271 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31275 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31277 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31281 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31283 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31284 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31286 /* SSE MMX or 3Dnow!A */
31287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31288 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31289 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31291 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31293 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31296 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31297 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31299 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31301 /* SSE2 */
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31320 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31321 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31327 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31358 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31362 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31370 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31371 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31373 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31376 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31377 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31378 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31380 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31381 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31394 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31399 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31411 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31420 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31423 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31429 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31433 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31438 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31443 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31444 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31445 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31446 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31447 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31448 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31451 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31453 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31454 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31456 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31471 /* SSE2 MMX */
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31473 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31475 /* SSE3 */
31476 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31477 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31479 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31480 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31481 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31482 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31483 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31484 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31486 /* SSSE3 */
31487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31488 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31489 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31490 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31491 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31492 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31494 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31495 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31496 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31497 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31498 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31499 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31500 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31501 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31519 /* SSSE3. */
31520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31523 /* SSE4.1 */
31524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31527 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31528 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31529 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31530 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31531 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31532 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31533 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31535 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31536 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31537 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31538 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31562 /* SSE4.1 */
31563 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31564 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31565 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31566 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31568 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31569 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31570 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31571 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31573 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31574 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31576 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31577 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31587 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31588 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31591 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31592 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31594 /* SSE4.2 */
31595 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31596 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31597 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31598 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31599 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31601 /* SSE4A */
31602 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31603 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31604 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31605 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31607 /* AES */
31608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31614 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31616 /* PCLMUL */
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31619 /* AVX */
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31675 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31691 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31695 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31708 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31722 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31732 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31733 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31734 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31755 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31756 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31758 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31760 /* AVX2 */
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31875 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31908 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31910 /* BMI */
31911 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31912 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31913 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31915 /* TBM */
31916 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31917 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31919 /* F16C */
31920 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31921 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31922 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31923 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31925 /* BMI2 */
31926 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31927 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31928 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31929 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31930 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31931 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31933 /* AVX512F */
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31955 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31956 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31989 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31990 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32100 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32101 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32102 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32103 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32135 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32140 /* Mask arithmetic operations */
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32152 /* SHA */
32153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32161 /* AVX512VL. */
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32172 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32173 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32174 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32175 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32200 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32201 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32202 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32203 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32204 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32205 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32206 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32207 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32208 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32209 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32210 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32211 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32212 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32217 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32218 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32219 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32220 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32221 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32222 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32223 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32224 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32225 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32226 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32229 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32230 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32231 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32232 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32253 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32256 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32257 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32258 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32259 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32260 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32266 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32267 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32268 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32269 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32270 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32271 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32272 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32273 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32274 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32275 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32277 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32278 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32279 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32288 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32289 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32292 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32293 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32296 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32297 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32300 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32301 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32302 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32303 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32304 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32305 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32306 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32308 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32309 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32320 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32321 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32322 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32323 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32327 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32334 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32335 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32340 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32341 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32342 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32343 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32346 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32347 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32348 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32349 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32350 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32351 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32354 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32355 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32356 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32357 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32362 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32363 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32364 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32367 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32398 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32399 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32400 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32401 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32426 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32427 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32428 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32429 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32430 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32431 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32432 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32433 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32434 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32435 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32436 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32439 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32443 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32481 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32545 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32546 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32547 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32548 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32549 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32550 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32551 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32552 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32553 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32558 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32559 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32560 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32561 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32572 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32573 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32574 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32575 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32576 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32577 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32578 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32579 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32636 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32637 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32638 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32639 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32652 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32653 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32654 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32655 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32656 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32657 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32661 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32662 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32663 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32664 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32665 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32668 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32669 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32670 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32671 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32672 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32673 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32677 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32690 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32691 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32692 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32693 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32706 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32707 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32708 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32709 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32750 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32751 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32754 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32756 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32758 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32760 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32761 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32763 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32764 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32765 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32766 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32767 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32768 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32769 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32777 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32778 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32779 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32780 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32798 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32799 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32800 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32801 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32810 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32811 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32812 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32813 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32819 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32820 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32821 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32822 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32839 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32840 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32842 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32858 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32859 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32860 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32861 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32868 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32869 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32870 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32871 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32875 /* AVX512DQ. */
32876 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32877 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32878 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32879 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32880 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32881 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32882 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32883 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32884 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32885 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32886 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32887 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32888 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32889 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32890 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32891 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32892 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32893 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32894 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32895 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32896 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32897 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32898 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32899 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32900 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32901 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32902 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32903 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32904 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32905 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32906 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32908 /* AVX512BW. */
32909 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32910 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32911 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32912 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32969 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32970 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32971 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32972 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32973 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32974 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32975 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32976 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32977 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32978 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32979 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32980 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32981 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32982 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32983 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32984 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32985 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32986 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32987 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32988 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32989 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32990 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32991 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32992 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32993 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32994 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32995 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32996 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32997 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32998 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32999 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33001 /* AVX512IFMA */
33002 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33003 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33004 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33005 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33006 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33007 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33008 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33009 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33010 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33011 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33012 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33013 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33015 /* AVX512VBMI */
33016 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33017 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33018 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33019 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33020 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33021 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33022 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33023 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33024 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33025 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33026 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33027 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33028 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33029 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33030 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33033 /* Builtins with rounding support. */
33034 static const struct builtin_description bdesc_round_args[] =
33036 /* AVX512F */
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33056 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33058 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33065 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33067 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33117 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33119 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33121 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33123 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33125 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33127 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33129 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33131 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33157 /* AVX512ER */
33158 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33159 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33160 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33161 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33162 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33163 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33164 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33165 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33166 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33167 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33169 /* AVX512DQ. */
33170 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33171 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33172 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33173 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33174 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33175 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33176 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33177 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33178 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33179 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33180 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33181 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33182 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33183 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33184 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33185 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33188 /* Bultins for MPX. */
33189 static const struct builtin_description bdesc_mpx[] =
33191 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33192 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33193 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33196 /* Const builtins for MPX. */
33197 static const struct builtin_description bdesc_mpx_const[] =
33199 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33200 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33201 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33202 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33203 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33204 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33205 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33206 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33209 /* FMA4 and XOP. */
33210 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33211 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33212 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33213 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33214 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33215 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33216 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33217 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33218 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33219 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33220 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33221 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33222 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33223 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33224 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33225 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33226 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33227 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33228 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33229 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33230 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33231 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33232 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33233 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33234 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33235 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33236 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33237 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33238 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33239 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33240 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33241 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33242 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33243 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33244 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33245 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33246 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33247 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33248 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33249 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33250 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33251 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33252 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33253 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33254 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33255 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33256 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33257 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33258 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33259 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33260 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33261 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33263 static const struct builtin_description bdesc_multi_arg[] =
33265 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33266 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33267 UNKNOWN, (int)MULTI_ARG_3_SF },
33268 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33269 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33270 UNKNOWN, (int)MULTI_ARG_3_DF },
33272 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33273 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33274 UNKNOWN, (int)MULTI_ARG_3_SF },
33275 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33276 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33277 UNKNOWN, (int)MULTI_ARG_3_DF },
33279 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33280 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33281 UNKNOWN, (int)MULTI_ARG_3_SF },
33282 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33283 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33284 UNKNOWN, (int)MULTI_ARG_3_DF },
33285 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33286 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33287 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33288 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33289 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33290 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33292 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33293 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33294 UNKNOWN, (int)MULTI_ARG_3_SF },
33295 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33296 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33297 UNKNOWN, (int)MULTI_ARG_3_DF },
33298 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33299 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33300 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33301 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33302 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33303 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33465 /* TM vector builtins. */
33467 /* Reuse the existing x86-specific `struct builtin_description' cause
33468 we're lazy. Add casts to make them fit. */
33469 static const struct builtin_description bdesc_tm[] =
33471 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33472 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33473 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33474 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33475 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33476 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33477 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33479 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33480 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33481 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33482 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33483 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33484 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33485 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33487 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33488 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33489 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33490 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33491 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33492 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33493 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33495 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33496 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33497 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33500 /* TM callbacks. */
33502 /* Return the builtin decl needed to load a vector of TYPE. */
33504 static tree
33505 ix86_builtin_tm_load (tree type)
33507 if (TREE_CODE (type) == VECTOR_TYPE)
33509 switch (tree_to_uhwi (TYPE_SIZE (type)))
33511 case 64:
33512 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33513 case 128:
33514 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33515 case 256:
33516 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33519 return NULL_TREE;
33522 /* Return the builtin decl needed to store a vector of TYPE. */
33524 static tree
33525 ix86_builtin_tm_store (tree type)
33527 if (TREE_CODE (type) == VECTOR_TYPE)
33529 switch (tree_to_uhwi (TYPE_SIZE (type)))
33531 case 64:
33532 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33533 case 128:
33534 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33535 case 256:
33536 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33539 return NULL_TREE;
33542 /* Initialize the transactional memory vector load/store builtins. */
33544 static void
33545 ix86_init_tm_builtins (void)
33547 enum ix86_builtin_func_type ftype;
33548 const struct builtin_description *d;
33549 size_t i;
33550 tree decl;
33551 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33552 tree attrs_log, attrs_type_log;
33554 if (!flag_tm)
33555 return;
33557 /* If there are no builtins defined, we must be compiling in a
33558 language without trans-mem support. */
33559 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33560 return;
33562 /* Use whatever attributes a normal TM load has. */
33563 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33564 attrs_load = DECL_ATTRIBUTES (decl);
33565 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33566 /* Use whatever attributes a normal TM store has. */
33567 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33568 attrs_store = DECL_ATTRIBUTES (decl);
33569 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33570 /* Use whatever attributes a normal TM log has. */
33571 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33572 attrs_log = DECL_ATTRIBUTES (decl);
33573 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33575 for (i = 0, d = bdesc_tm;
33576 i < ARRAY_SIZE (bdesc_tm);
33577 i++, d++)
33579 if ((d->mask & ix86_isa_flags) != 0
33580 || (lang_hooks.builtin_function
33581 == lang_hooks.builtin_function_ext_scope))
33583 tree type, attrs, attrs_type;
33584 enum built_in_function code = (enum built_in_function) d->code;
33586 ftype = (enum ix86_builtin_func_type) d->flag;
33587 type = ix86_get_builtin_func_type (ftype);
33589 if (BUILTIN_TM_LOAD_P (code))
33591 attrs = attrs_load;
33592 attrs_type = attrs_type_load;
33594 else if (BUILTIN_TM_STORE_P (code))
33596 attrs = attrs_store;
33597 attrs_type = attrs_type_store;
33599 else
33601 attrs = attrs_log;
33602 attrs_type = attrs_type_log;
33604 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33605 /* The builtin without the prefix for
33606 calling it directly. */
33607 d->name + strlen ("__builtin_"),
33608 attrs);
33609 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33610 set the TYPE_ATTRIBUTES. */
33611 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33613 set_builtin_decl (code, decl, false);
33618 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33619 in the current target ISA to allow the user to compile particular modules
33620 with different target specific options that differ from the command line
33621 options. */
33622 static void
33623 ix86_init_mmx_sse_builtins (void)
33625 const struct builtin_description * d;
33626 enum ix86_builtin_func_type ftype;
33627 size_t i;
33629 /* Add all special builtins with variable number of operands. */
33630 for (i = 0, d = bdesc_special_args;
33631 i < ARRAY_SIZE (bdesc_special_args);
33632 i++, d++)
33634 if (d->name == 0)
33635 continue;
33637 ftype = (enum ix86_builtin_func_type) d->flag;
33638 def_builtin (d->mask, d->name, ftype, d->code);
33641 /* Add all builtins with variable number of operands. */
33642 for (i = 0, d = bdesc_args;
33643 i < ARRAY_SIZE (bdesc_args);
33644 i++, d++)
33646 if (d->name == 0)
33647 continue;
33649 ftype = (enum ix86_builtin_func_type) d->flag;
33650 def_builtin_const (d->mask, d->name, ftype, d->code);
33653 /* Add all builtins with rounding. */
33654 for (i = 0, d = bdesc_round_args;
33655 i < ARRAY_SIZE (bdesc_round_args);
33656 i++, d++)
33658 if (d->name == 0)
33659 continue;
33661 ftype = (enum ix86_builtin_func_type) d->flag;
33662 def_builtin_const (d->mask, d->name, ftype, d->code);
33665 /* pcmpestr[im] insns. */
33666 for (i = 0, d = bdesc_pcmpestr;
33667 i < ARRAY_SIZE (bdesc_pcmpestr);
33668 i++, d++)
33670 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33671 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33672 else
33673 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33674 def_builtin_const (d->mask, d->name, ftype, d->code);
33677 /* pcmpistr[im] insns. */
33678 for (i = 0, d = bdesc_pcmpistr;
33679 i < ARRAY_SIZE (bdesc_pcmpistr);
33680 i++, d++)
33682 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33683 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33684 else
33685 ftype = INT_FTYPE_V16QI_V16QI_INT;
33686 def_builtin_const (d->mask, d->name, ftype, d->code);
33689 /* comi/ucomi insns. */
33690 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33692 if (d->mask == OPTION_MASK_ISA_SSE2)
33693 ftype = INT_FTYPE_V2DF_V2DF;
33694 else
33695 ftype = INT_FTYPE_V4SF_V4SF;
33696 def_builtin_const (d->mask, d->name, ftype, d->code);
33699 /* SSE */
33700 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33701 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33702 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33703 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33705 /* SSE or 3DNow!A */
33706 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33707 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33708 IX86_BUILTIN_MASKMOVQ);
33710 /* SSE2 */
33711 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33712 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33714 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33715 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33716 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33717 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33719 /* SSE3. */
33720 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33721 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33722 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33723 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33725 /* AES */
33726 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33727 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33728 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33729 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33730 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33731 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33732 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33733 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33734 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33735 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33736 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33737 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33739 /* PCLMUL */
33740 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33741 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33743 /* RDRND */
33744 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33745 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33746 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33747 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33748 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33749 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33750 IX86_BUILTIN_RDRAND64_STEP);
33752 /* AVX2 */
33753 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33754 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33755 IX86_BUILTIN_GATHERSIV2DF);
33757 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33758 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33759 IX86_BUILTIN_GATHERSIV4DF);
33761 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33762 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33763 IX86_BUILTIN_GATHERDIV2DF);
33765 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33766 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33767 IX86_BUILTIN_GATHERDIV4DF);
33769 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33770 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33771 IX86_BUILTIN_GATHERSIV4SF);
33773 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33774 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33775 IX86_BUILTIN_GATHERSIV8SF);
33777 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33778 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33779 IX86_BUILTIN_GATHERDIV4SF);
33781 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33782 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33783 IX86_BUILTIN_GATHERDIV8SF);
33785 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33786 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33787 IX86_BUILTIN_GATHERSIV2DI);
33789 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33790 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33791 IX86_BUILTIN_GATHERSIV4DI);
33793 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33794 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33795 IX86_BUILTIN_GATHERDIV2DI);
33797 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33798 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33799 IX86_BUILTIN_GATHERDIV4DI);
33801 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33802 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33803 IX86_BUILTIN_GATHERSIV4SI);
33805 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33806 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33807 IX86_BUILTIN_GATHERSIV8SI);
33809 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33810 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33811 IX86_BUILTIN_GATHERDIV4SI);
33813 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33814 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33815 IX86_BUILTIN_GATHERDIV8SI);
33817 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33818 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33819 IX86_BUILTIN_GATHERALTSIV4DF);
33821 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33822 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33823 IX86_BUILTIN_GATHERALTDIV8SF);
33825 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33826 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33827 IX86_BUILTIN_GATHERALTSIV4DI);
33829 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33830 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33831 IX86_BUILTIN_GATHERALTDIV8SI);
33833 /* AVX512F */
33834 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33835 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33836 IX86_BUILTIN_GATHER3SIV16SF);
33838 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33839 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33840 IX86_BUILTIN_GATHER3SIV8DF);
33842 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33843 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33844 IX86_BUILTIN_GATHER3DIV16SF);
33846 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33847 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33848 IX86_BUILTIN_GATHER3DIV8DF);
33850 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33851 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33852 IX86_BUILTIN_GATHER3SIV16SI);
33854 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33855 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33856 IX86_BUILTIN_GATHER3SIV8DI);
33858 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33859 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33860 IX86_BUILTIN_GATHER3DIV16SI);
33862 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33863 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33864 IX86_BUILTIN_GATHER3DIV8DI);
33866 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33867 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33868 IX86_BUILTIN_GATHER3ALTSIV8DF);
33870 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33871 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33872 IX86_BUILTIN_GATHER3ALTDIV16SF);
33874 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33875 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33876 IX86_BUILTIN_GATHER3ALTSIV8DI);
33878 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33879 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33880 IX86_BUILTIN_GATHER3ALTDIV16SI);
33882 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33883 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33884 IX86_BUILTIN_SCATTERSIV16SF);
33886 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33887 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33888 IX86_BUILTIN_SCATTERSIV8DF);
33890 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33891 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33892 IX86_BUILTIN_SCATTERDIV16SF);
33894 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33895 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33896 IX86_BUILTIN_SCATTERDIV8DF);
33898 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33899 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33900 IX86_BUILTIN_SCATTERSIV16SI);
33902 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33903 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33904 IX86_BUILTIN_SCATTERSIV8DI);
33906 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33907 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33908 IX86_BUILTIN_SCATTERDIV16SI);
33910 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33911 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33912 IX86_BUILTIN_SCATTERDIV8DI);
33914 /* AVX512VL */
33915 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33916 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33917 IX86_BUILTIN_GATHER3SIV2DF);
33919 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33920 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33921 IX86_BUILTIN_GATHER3SIV4DF);
33923 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33924 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33925 IX86_BUILTIN_GATHER3DIV2DF);
33927 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33928 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33929 IX86_BUILTIN_GATHER3DIV4DF);
33931 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33932 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33933 IX86_BUILTIN_GATHER3SIV4SF);
33935 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33936 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33937 IX86_BUILTIN_GATHER3SIV8SF);
33939 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33940 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33941 IX86_BUILTIN_GATHER3DIV4SF);
33943 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33944 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33945 IX86_BUILTIN_GATHER3DIV8SF);
33947 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33948 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33949 IX86_BUILTIN_GATHER3SIV2DI);
33951 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33952 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33953 IX86_BUILTIN_GATHER3SIV4DI);
33955 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33956 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33957 IX86_BUILTIN_GATHER3DIV2DI);
33959 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33960 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33961 IX86_BUILTIN_GATHER3DIV4DI);
33963 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33964 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33965 IX86_BUILTIN_GATHER3SIV4SI);
33967 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33968 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33969 IX86_BUILTIN_GATHER3SIV8SI);
33971 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33972 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33973 IX86_BUILTIN_GATHER3DIV4SI);
33975 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33976 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33977 IX86_BUILTIN_GATHER3DIV8SI);
33979 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33980 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33981 IX86_BUILTIN_GATHER3ALTSIV4DF);
33983 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33984 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33985 IX86_BUILTIN_GATHER3ALTDIV8SF);
33987 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33988 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33989 IX86_BUILTIN_GATHER3ALTSIV4DI);
33991 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33992 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33993 IX86_BUILTIN_GATHER3ALTDIV8SI);
33995 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33996 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33997 IX86_BUILTIN_SCATTERSIV8SF);
33999 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34000 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34001 IX86_BUILTIN_SCATTERSIV4SF);
34003 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34004 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34005 IX86_BUILTIN_SCATTERSIV4DF);
34007 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34008 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34009 IX86_BUILTIN_SCATTERSIV2DF);
34011 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34012 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34013 IX86_BUILTIN_SCATTERDIV8SF);
34015 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34016 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34017 IX86_BUILTIN_SCATTERDIV4SF);
34019 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34020 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34021 IX86_BUILTIN_SCATTERDIV4DF);
34023 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34024 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34025 IX86_BUILTIN_SCATTERDIV2DF);
34027 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34028 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34029 IX86_BUILTIN_SCATTERSIV8SI);
34031 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34032 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34033 IX86_BUILTIN_SCATTERSIV4SI);
34035 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34036 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34037 IX86_BUILTIN_SCATTERSIV4DI);
34039 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34040 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34041 IX86_BUILTIN_SCATTERSIV2DI);
34043 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34044 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34045 IX86_BUILTIN_SCATTERDIV8SI);
34047 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34048 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34049 IX86_BUILTIN_SCATTERDIV4SI);
34051 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34052 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34053 IX86_BUILTIN_SCATTERDIV4DI);
34055 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34056 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34057 IX86_BUILTIN_SCATTERDIV2DI);
34059 /* AVX512PF */
34060 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34061 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34062 IX86_BUILTIN_GATHERPFDPD);
34063 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34064 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34065 IX86_BUILTIN_GATHERPFDPS);
34066 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34067 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34068 IX86_BUILTIN_GATHERPFQPD);
34069 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34070 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34071 IX86_BUILTIN_GATHERPFQPS);
34072 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34073 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34074 IX86_BUILTIN_SCATTERPFDPD);
34075 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34076 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34077 IX86_BUILTIN_SCATTERPFDPS);
34078 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34079 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34080 IX86_BUILTIN_SCATTERPFQPD);
34081 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34082 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34083 IX86_BUILTIN_SCATTERPFQPS);
34085 /* SHA */
34086 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34087 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34088 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34089 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34090 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34091 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34092 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34093 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34094 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34095 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34096 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34097 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34098 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34099 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34101 /* RTM. */
34102 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34103 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34105 /* MMX access to the vec_init patterns. */
34106 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34107 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34109 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34110 V4HI_FTYPE_HI_HI_HI_HI,
34111 IX86_BUILTIN_VEC_INIT_V4HI);
34113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34114 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34115 IX86_BUILTIN_VEC_INIT_V8QI);
34117 /* Access to the vec_extract patterns. */
34118 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34119 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34120 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34121 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34122 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34123 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34124 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34125 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34126 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34127 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34129 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34130 "__builtin_ia32_vec_ext_v4hi",
34131 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34133 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34134 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34137 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34139 /* Access to the vec_set patterns. */
34140 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34141 "__builtin_ia32_vec_set_v2di",
34142 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34144 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34145 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34147 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34148 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34150 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34151 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34153 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34154 "__builtin_ia32_vec_set_v4hi",
34155 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34157 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34158 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34160 /* RDSEED */
34161 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34162 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34163 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34164 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34165 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34166 "__builtin_ia32_rdseed_di_step",
34167 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34169 /* ADCX */
34170 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34171 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34172 def_builtin (OPTION_MASK_ISA_64BIT,
34173 "__builtin_ia32_addcarryx_u64",
34174 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34175 IX86_BUILTIN_ADDCARRYX64);
34177 /* SBB */
34178 def_builtin (0, "__builtin_ia32_sbb_u32",
34179 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34180 def_builtin (OPTION_MASK_ISA_64BIT,
34181 "__builtin_ia32_sbb_u64",
34182 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34183 IX86_BUILTIN_SBB64);
34185 /* Read/write FLAGS. */
34186 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34187 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34188 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34189 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34190 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34191 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34192 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34193 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34195 /* CLFLUSHOPT. */
34196 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34197 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34199 /* CLWB. */
34200 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34201 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34203 /* Add FMA4 multi-arg argument instructions */
34204 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34206 if (d->name == 0)
34207 continue;
34209 ftype = (enum ix86_builtin_func_type) d->flag;
34210 def_builtin_const (d->mask, d->name, ftype, d->code);
34214 static void
34215 ix86_init_mpx_builtins ()
34217 const struct builtin_description * d;
34218 enum ix86_builtin_func_type ftype;
34219 tree decl;
34220 size_t i;
34222 for (i = 0, d = bdesc_mpx;
34223 i < ARRAY_SIZE (bdesc_mpx);
34224 i++, d++)
34226 if (d->name == 0)
34227 continue;
34229 ftype = (enum ix86_builtin_func_type) d->flag;
34230 decl = def_builtin (d->mask, d->name, ftype, d->code);
34232 /* With no leaf and nothrow flags for MPX builtins
34233 abnormal edges may follow its call when setjmp
34234 presents in the function. Since we may have a lot
34235 of MPX builtins calls it causes lots of useless
34236 edges and enormous PHI nodes. To avoid this we mark
34237 MPX builtins as leaf and nothrow. */
34238 if (decl)
34240 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34241 NULL_TREE);
34242 TREE_NOTHROW (decl) = 1;
34244 else
34246 ix86_builtins_isa[(int)d->code].leaf_p = true;
34247 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34251 for (i = 0, d = bdesc_mpx_const;
34252 i < ARRAY_SIZE (bdesc_mpx_const);
34253 i++, d++)
34255 if (d->name == 0)
34256 continue;
34258 ftype = (enum ix86_builtin_func_type) d->flag;
34259 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34261 if (decl)
34263 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34264 NULL_TREE);
34265 TREE_NOTHROW (decl) = 1;
34267 else
34269 ix86_builtins_isa[(int)d->code].leaf_p = true;
34270 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34275 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34276 to return a pointer to VERSION_DECL if the outcome of the expression
34277 formed by PREDICATE_CHAIN is true. This function will be called during
34278 version dispatch to decide which function version to execute. It returns
34279 the basic block at the end, to which more conditions can be added. */
34281 static basic_block
34282 add_condition_to_bb (tree function_decl, tree version_decl,
34283 tree predicate_chain, basic_block new_bb)
34285 gimple return_stmt;
34286 tree convert_expr, result_var;
34287 gimple convert_stmt;
34288 gimple call_cond_stmt;
34289 gimple if_else_stmt;
34291 basic_block bb1, bb2, bb3;
34292 edge e12, e23;
34294 tree cond_var, and_expr_var = NULL_TREE;
34295 gimple_seq gseq;
34297 tree predicate_decl, predicate_arg;
34299 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34301 gcc_assert (new_bb != NULL);
34302 gseq = bb_seq (new_bb);
34305 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34306 build_fold_addr_expr (version_decl));
34307 result_var = create_tmp_var (ptr_type_node);
34308 convert_stmt = gimple_build_assign (result_var, convert_expr);
34309 return_stmt = gimple_build_return (result_var);
34311 if (predicate_chain == NULL_TREE)
34313 gimple_seq_add_stmt (&gseq, convert_stmt);
34314 gimple_seq_add_stmt (&gseq, return_stmt);
34315 set_bb_seq (new_bb, gseq);
34316 gimple_set_bb (convert_stmt, new_bb);
34317 gimple_set_bb (return_stmt, new_bb);
34318 pop_cfun ();
34319 return new_bb;
34322 while (predicate_chain != NULL)
34324 cond_var = create_tmp_var (integer_type_node);
34325 predicate_decl = TREE_PURPOSE (predicate_chain);
34326 predicate_arg = TREE_VALUE (predicate_chain);
34327 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34328 gimple_call_set_lhs (call_cond_stmt, cond_var);
34330 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34331 gimple_set_bb (call_cond_stmt, new_bb);
34332 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34334 predicate_chain = TREE_CHAIN (predicate_chain);
34336 if (and_expr_var == NULL)
34337 and_expr_var = cond_var;
34338 else
34340 gimple assign_stmt;
34341 /* Use MIN_EXPR to check if any integer is zero?.
34342 and_expr_var = min_expr <cond_var, and_expr_var> */
34343 assign_stmt = gimple_build_assign (and_expr_var,
34344 build2 (MIN_EXPR, integer_type_node,
34345 cond_var, and_expr_var));
34347 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34348 gimple_set_bb (assign_stmt, new_bb);
34349 gimple_seq_add_stmt (&gseq, assign_stmt);
34353 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34354 integer_zero_node,
34355 NULL_TREE, NULL_TREE);
34356 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34357 gimple_set_bb (if_else_stmt, new_bb);
34358 gimple_seq_add_stmt (&gseq, if_else_stmt);
34360 gimple_seq_add_stmt (&gseq, convert_stmt);
34361 gimple_seq_add_stmt (&gseq, return_stmt);
34362 set_bb_seq (new_bb, gseq);
34364 bb1 = new_bb;
34365 e12 = split_block (bb1, if_else_stmt);
34366 bb2 = e12->dest;
34367 e12->flags &= ~EDGE_FALLTHRU;
34368 e12->flags |= EDGE_TRUE_VALUE;
34370 e23 = split_block (bb2, return_stmt);
34372 gimple_set_bb (convert_stmt, bb2);
34373 gimple_set_bb (return_stmt, bb2);
34375 bb3 = e23->dest;
34376 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34378 remove_edge (e23);
34379 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34381 pop_cfun ();
34383 return bb3;
34386 /* This parses the attribute arguments to target in DECL and determines
34387 the right builtin to use to match the platform specification.
34388 It returns the priority value for this version decl. If PREDICATE_LIST
34389 is not NULL, it stores the list of cpu features that need to be checked
34390 before dispatching this function. */
34392 static unsigned int
34393 get_builtin_code_for_version (tree decl, tree *predicate_list)
34395 tree attrs;
34396 struct cl_target_option cur_target;
34397 tree target_node;
34398 struct cl_target_option *new_target;
34399 const char *arg_str = NULL;
34400 const char *attrs_str = NULL;
34401 char *tok_str = NULL;
34402 char *token;
34404 /* Priority of i386 features, greater value is higher priority. This is
34405 used to decide the order in which function dispatch must happen. For
34406 instance, a version specialized for SSE4.2 should be checked for dispatch
34407 before a version for SSE3, as SSE4.2 implies SSE3. */
34408 enum feature_priority
34410 P_ZERO = 0,
34411 P_MMX,
34412 P_SSE,
34413 P_SSE2,
34414 P_SSE3,
34415 P_SSSE3,
34416 P_PROC_SSSE3,
34417 P_SSE4_A,
34418 P_PROC_SSE4_A,
34419 P_SSE4_1,
34420 P_SSE4_2,
34421 P_PROC_SSE4_2,
34422 P_POPCNT,
34423 P_AVX,
34424 P_PROC_AVX,
34425 P_BMI,
34426 P_PROC_BMI,
34427 P_FMA4,
34428 P_XOP,
34429 P_PROC_XOP,
34430 P_FMA,
34431 P_PROC_FMA,
34432 P_BMI2,
34433 P_AVX2,
34434 P_PROC_AVX2,
34435 P_AVX512F,
34436 P_PROC_AVX512F
34439 enum feature_priority priority = P_ZERO;
34441 /* These are the target attribute strings for which a dispatcher is
34442 available, from fold_builtin_cpu. */
34444 static struct _feature_list
34446 const char *const name;
34447 const enum feature_priority priority;
34449 const feature_list[] =
34451 {"mmx", P_MMX},
34452 {"sse", P_SSE},
34453 {"sse2", P_SSE2},
34454 {"sse3", P_SSE3},
34455 {"sse4a", P_SSE4_A},
34456 {"ssse3", P_SSSE3},
34457 {"sse4.1", P_SSE4_1},
34458 {"sse4.2", P_SSE4_2},
34459 {"popcnt", P_POPCNT},
34460 {"avx", P_AVX},
34461 {"bmi", P_BMI},
34462 {"fma4", P_FMA4},
34463 {"xop", P_XOP},
34464 {"fma", P_FMA},
34465 {"bmi2", P_BMI2},
34466 {"avx2", P_AVX2},
34467 {"avx512f", P_AVX512F}
34471 static unsigned int NUM_FEATURES
34472 = sizeof (feature_list) / sizeof (struct _feature_list);
34474 unsigned int i;
34476 tree predicate_chain = NULL_TREE;
34477 tree predicate_decl, predicate_arg;
34479 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34480 gcc_assert (attrs != NULL);
34482 attrs = TREE_VALUE (TREE_VALUE (attrs));
34484 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34485 attrs_str = TREE_STRING_POINTER (attrs);
34487 /* Return priority zero for default function. */
34488 if (strcmp (attrs_str, "default") == 0)
34489 return 0;
34491 /* Handle arch= if specified. For priority, set it to be 1 more than
34492 the best instruction set the processor can handle. For instance, if
34493 there is a version for atom and a version for ssse3 (the highest ISA
34494 priority for atom), the atom version must be checked for dispatch
34495 before the ssse3 version. */
34496 if (strstr (attrs_str, "arch=") != NULL)
34498 cl_target_option_save (&cur_target, &global_options);
34499 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34500 &global_options_set);
34502 gcc_assert (target_node);
34503 new_target = TREE_TARGET_OPTION (target_node);
34504 gcc_assert (new_target);
34506 if (new_target->arch_specified && new_target->arch > 0)
34508 switch (new_target->arch)
34510 case PROCESSOR_CORE2:
34511 arg_str = "core2";
34512 priority = P_PROC_SSSE3;
34513 break;
34514 case PROCESSOR_NEHALEM:
34515 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34516 arg_str = "westmere";
34517 else
34518 /* We translate "arch=corei7" and "arch=nehalem" to
34519 "corei7" so that it will be mapped to M_INTEL_COREI7
34520 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34521 arg_str = "corei7";
34522 priority = P_PROC_SSE4_2;
34523 break;
34524 case PROCESSOR_SANDYBRIDGE:
34525 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34526 arg_str = "ivybridge";
34527 else
34528 arg_str = "sandybridge";
34529 priority = P_PROC_AVX;
34530 break;
34531 case PROCESSOR_HASWELL:
34532 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34533 arg_str = "broadwell";
34534 else
34535 arg_str = "haswell";
34536 priority = P_PROC_AVX2;
34537 break;
34538 case PROCESSOR_BONNELL:
34539 arg_str = "bonnell";
34540 priority = P_PROC_SSSE3;
34541 break;
34542 case PROCESSOR_KNL:
34543 arg_str = "knl";
34544 priority = P_PROC_AVX512F;
34545 break;
34546 case PROCESSOR_SILVERMONT:
34547 arg_str = "silvermont";
34548 priority = P_PROC_SSE4_2;
34549 break;
34550 case PROCESSOR_AMDFAM10:
34551 arg_str = "amdfam10h";
34552 priority = P_PROC_SSE4_A;
34553 break;
34554 case PROCESSOR_BTVER1:
34555 arg_str = "btver1";
34556 priority = P_PROC_SSE4_A;
34557 break;
34558 case PROCESSOR_BTVER2:
34559 arg_str = "btver2";
34560 priority = P_PROC_BMI;
34561 break;
34562 case PROCESSOR_BDVER1:
34563 arg_str = "bdver1";
34564 priority = P_PROC_XOP;
34565 break;
34566 case PROCESSOR_BDVER2:
34567 arg_str = "bdver2";
34568 priority = P_PROC_FMA;
34569 break;
34570 case PROCESSOR_BDVER3:
34571 arg_str = "bdver3";
34572 priority = P_PROC_FMA;
34573 break;
34574 case PROCESSOR_BDVER4:
34575 arg_str = "bdver4";
34576 priority = P_PROC_AVX2;
34577 break;
34581 cl_target_option_restore (&global_options, &cur_target);
34583 if (predicate_list && arg_str == NULL)
34585 error_at (DECL_SOURCE_LOCATION (decl),
34586 "No dispatcher found for the versioning attributes");
34587 return 0;
34590 if (predicate_list)
34592 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34593 /* For a C string literal the length includes the trailing NULL. */
34594 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34595 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34596 predicate_chain);
34600 /* Process feature name. */
34601 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34602 strcpy (tok_str, attrs_str);
34603 token = strtok (tok_str, ",");
34604 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34606 while (token != NULL)
34608 /* Do not process "arch=" */
34609 if (strncmp (token, "arch=", 5) == 0)
34611 token = strtok (NULL, ",");
34612 continue;
34614 for (i = 0; i < NUM_FEATURES; ++i)
34616 if (strcmp (token, feature_list[i].name) == 0)
34618 if (predicate_list)
34620 predicate_arg = build_string_literal (
34621 strlen (feature_list[i].name) + 1,
34622 feature_list[i].name);
34623 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34624 predicate_chain);
34626 /* Find the maximum priority feature. */
34627 if (feature_list[i].priority > priority)
34628 priority = feature_list[i].priority;
34630 break;
34633 if (predicate_list && i == NUM_FEATURES)
34635 error_at (DECL_SOURCE_LOCATION (decl),
34636 "No dispatcher found for %s", token);
34637 return 0;
34639 token = strtok (NULL, ",");
34641 free (tok_str);
34643 if (predicate_list && predicate_chain == NULL_TREE)
34645 error_at (DECL_SOURCE_LOCATION (decl),
34646 "No dispatcher found for the versioning attributes : %s",
34647 attrs_str);
34648 return 0;
34650 else if (predicate_list)
34652 predicate_chain = nreverse (predicate_chain);
34653 *predicate_list = predicate_chain;
34656 return priority;
34659 /* This compares the priority of target features in function DECL1
34660 and DECL2. It returns positive value if DECL1 is higher priority,
34661 negative value if DECL2 is higher priority and 0 if they are the
34662 same. */
34664 static int
34665 ix86_compare_version_priority (tree decl1, tree decl2)
34667 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34668 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34670 return (int)priority1 - (int)priority2;
34673 /* V1 and V2 point to function versions with different priorities
34674 based on the target ISA. This function compares their priorities. */
34676 static int
34677 feature_compare (const void *v1, const void *v2)
34679 typedef struct _function_version_info
34681 tree version_decl;
34682 tree predicate_chain;
34683 unsigned int dispatch_priority;
34684 } function_version_info;
34686 const function_version_info c1 = *(const function_version_info *)v1;
34687 const function_version_info c2 = *(const function_version_info *)v2;
34688 return (c2.dispatch_priority - c1.dispatch_priority);
34691 /* This function generates the dispatch function for
34692 multi-versioned functions. DISPATCH_DECL is the function which will
34693 contain the dispatch logic. FNDECLS are the function choices for
34694 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34695 in DISPATCH_DECL in which the dispatch code is generated. */
34697 static int
34698 dispatch_function_versions (tree dispatch_decl,
34699 void *fndecls_p,
34700 basic_block *empty_bb)
34702 tree default_decl;
34703 gimple ifunc_cpu_init_stmt;
34704 gimple_seq gseq;
34705 int ix;
34706 tree ele;
34707 vec<tree> *fndecls;
34708 unsigned int num_versions = 0;
34709 unsigned int actual_versions = 0;
34710 unsigned int i;
34712 struct _function_version_info
34714 tree version_decl;
34715 tree predicate_chain;
34716 unsigned int dispatch_priority;
34717 }*function_version_info;
34719 gcc_assert (dispatch_decl != NULL
34720 && fndecls_p != NULL
34721 && empty_bb != NULL);
34723 /*fndecls_p is actually a vector. */
34724 fndecls = static_cast<vec<tree> *> (fndecls_p);
34726 /* At least one more version other than the default. */
34727 num_versions = fndecls->length ();
34728 gcc_assert (num_versions >= 2);
34730 function_version_info = (struct _function_version_info *)
34731 XNEWVEC (struct _function_version_info, (num_versions - 1));
34733 /* The first version in the vector is the default decl. */
34734 default_decl = (*fndecls)[0];
34736 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34738 gseq = bb_seq (*empty_bb);
34739 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34740 constructors, so explicity call __builtin_cpu_init here. */
34741 ifunc_cpu_init_stmt = gimple_build_call_vec (
34742 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34743 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34744 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34745 set_bb_seq (*empty_bb, gseq);
34747 pop_cfun ();
34750 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34752 tree version_decl = ele;
34753 tree predicate_chain = NULL_TREE;
34754 unsigned int priority;
34755 /* Get attribute string, parse it and find the right predicate decl.
34756 The predicate function could be a lengthy combination of many
34757 features, like arch-type and various isa-variants. */
34758 priority = get_builtin_code_for_version (version_decl,
34759 &predicate_chain);
34761 if (predicate_chain == NULL_TREE)
34762 continue;
34764 function_version_info [actual_versions].version_decl = version_decl;
34765 function_version_info [actual_versions].predicate_chain
34766 = predicate_chain;
34767 function_version_info [actual_versions].dispatch_priority = priority;
34768 actual_versions++;
34771 /* Sort the versions according to descending order of dispatch priority. The
34772 priority is based on the ISA. This is not a perfect solution. There
34773 could still be ambiguity. If more than one function version is suitable
34774 to execute, which one should be dispatched? In future, allow the user
34775 to specify a dispatch priority next to the version. */
34776 qsort (function_version_info, actual_versions,
34777 sizeof (struct _function_version_info), feature_compare);
34779 for (i = 0; i < actual_versions; ++i)
34780 *empty_bb = add_condition_to_bb (dispatch_decl,
34781 function_version_info[i].version_decl,
34782 function_version_info[i].predicate_chain,
34783 *empty_bb);
34785 /* dispatch default version at the end. */
34786 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34787 NULL, *empty_bb);
34789 free (function_version_info);
34790 return 0;
34793 /* Comparator function to be used in qsort routine to sort attribute
34794 specification strings to "target". */
34796 static int
34797 attr_strcmp (const void *v1, const void *v2)
34799 const char *c1 = *(char *const*)v1;
34800 const char *c2 = *(char *const*)v2;
34801 return strcmp (c1, c2);
34804 /* ARGLIST is the argument to target attribute. This function tokenizes
34805 the comma separated arguments, sorts them and returns a string which
34806 is a unique identifier for the comma separated arguments. It also
34807 replaces non-identifier characters "=,-" with "_". */
34809 static char *
34810 sorted_attr_string (tree arglist)
34812 tree arg;
34813 size_t str_len_sum = 0;
34814 char **args = NULL;
34815 char *attr_str, *ret_str;
34816 char *attr = NULL;
34817 unsigned int argnum = 1;
34818 unsigned int i;
34820 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34822 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34823 size_t len = strlen (str);
34824 str_len_sum += len + 1;
34825 if (arg != arglist)
34826 argnum++;
34827 for (i = 0; i < strlen (str); i++)
34828 if (str[i] == ',')
34829 argnum++;
34832 attr_str = XNEWVEC (char, str_len_sum);
34833 str_len_sum = 0;
34834 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34836 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34837 size_t len = strlen (str);
34838 memcpy (attr_str + str_len_sum, str, len);
34839 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34840 str_len_sum += len + 1;
34843 /* Replace "=,-" with "_". */
34844 for (i = 0; i < strlen (attr_str); i++)
34845 if (attr_str[i] == '=' || attr_str[i]== '-')
34846 attr_str[i] = '_';
34848 if (argnum == 1)
34849 return attr_str;
34851 args = XNEWVEC (char *, argnum);
34853 i = 0;
34854 attr = strtok (attr_str, ",");
34855 while (attr != NULL)
34857 args[i] = attr;
34858 i++;
34859 attr = strtok (NULL, ",");
34862 qsort (args, argnum, sizeof (char *), attr_strcmp);
34864 ret_str = XNEWVEC (char, str_len_sum);
34865 str_len_sum = 0;
34866 for (i = 0; i < argnum; i++)
34868 size_t len = strlen (args[i]);
34869 memcpy (ret_str + str_len_sum, args[i], len);
34870 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34871 str_len_sum += len + 1;
34874 XDELETEVEC (args);
34875 XDELETEVEC (attr_str);
34876 return ret_str;
34879 /* This function changes the assembler name for functions that are
34880 versions. If DECL is a function version and has a "target"
34881 attribute, it appends the attribute string to its assembler name. */
34883 static tree
34884 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34886 tree version_attr;
34887 const char *orig_name, *version_string;
34888 char *attr_str, *assembler_name;
34890 if (DECL_DECLARED_INLINE_P (decl)
34891 && lookup_attribute ("gnu_inline",
34892 DECL_ATTRIBUTES (decl)))
34893 error_at (DECL_SOURCE_LOCATION (decl),
34894 "Function versions cannot be marked as gnu_inline,"
34895 " bodies have to be generated");
34897 if (DECL_VIRTUAL_P (decl)
34898 || DECL_VINDEX (decl))
34899 sorry ("Virtual function multiversioning not supported");
34901 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34903 /* target attribute string cannot be NULL. */
34904 gcc_assert (version_attr != NULL_TREE);
34906 orig_name = IDENTIFIER_POINTER (id);
34907 version_string
34908 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34910 if (strcmp (version_string, "default") == 0)
34911 return id;
34913 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34914 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34916 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34918 /* Allow assembler name to be modified if already set. */
34919 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34920 SET_DECL_RTL (decl, NULL);
34922 tree ret = get_identifier (assembler_name);
34923 XDELETEVEC (attr_str);
34924 XDELETEVEC (assembler_name);
34925 return ret;
34928 /* This function returns true if FN1 and FN2 are versions of the same function,
34929 that is, the target strings of the function decls are different. This assumes
34930 that FN1 and FN2 have the same signature. */
34932 static bool
34933 ix86_function_versions (tree fn1, tree fn2)
34935 tree attr1, attr2;
34936 char *target1, *target2;
34937 bool result;
34939 if (TREE_CODE (fn1) != FUNCTION_DECL
34940 || TREE_CODE (fn2) != FUNCTION_DECL)
34941 return false;
34943 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34944 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34946 /* At least one function decl should have the target attribute specified. */
34947 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34948 return false;
34950 /* Diagnose missing target attribute if one of the decls is already
34951 multi-versioned. */
34952 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34954 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34956 if (attr2 != NULL_TREE)
34958 tree tem = fn1;
34959 fn1 = fn2;
34960 fn2 = tem;
34961 attr1 = attr2;
34963 error_at (DECL_SOURCE_LOCATION (fn2),
34964 "missing %<target%> attribute for multi-versioned %D",
34965 fn2);
34966 inform (DECL_SOURCE_LOCATION (fn1),
34967 "previous declaration of %D", fn1);
34968 /* Prevent diagnosing of the same error multiple times. */
34969 DECL_ATTRIBUTES (fn2)
34970 = tree_cons (get_identifier ("target"),
34971 copy_node (TREE_VALUE (attr1)),
34972 DECL_ATTRIBUTES (fn2));
34974 return false;
34977 target1 = sorted_attr_string (TREE_VALUE (attr1));
34978 target2 = sorted_attr_string (TREE_VALUE (attr2));
34980 /* The sorted target strings must be different for fn1 and fn2
34981 to be versions. */
34982 if (strcmp (target1, target2) == 0)
34983 result = false;
34984 else
34985 result = true;
34987 XDELETEVEC (target1);
34988 XDELETEVEC (target2);
34990 return result;
34993 static tree
34994 ix86_mangle_decl_assembler_name (tree decl, tree id)
34996 /* For function version, add the target suffix to the assembler name. */
34997 if (TREE_CODE (decl) == FUNCTION_DECL
34998 && DECL_FUNCTION_VERSIONED (decl))
34999 id = ix86_mangle_function_version_assembler_name (decl, id);
35000 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35001 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35002 #endif
35004 return id;
35007 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35008 is true, append the full path name of the source file. */
35010 static char *
35011 make_name (tree decl, const char *suffix, bool make_unique)
35013 char *global_var_name;
35014 int name_len;
35015 const char *name;
35016 const char *unique_name = NULL;
35018 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35020 /* Get a unique name that can be used globally without any chances
35021 of collision at link time. */
35022 if (make_unique)
35023 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35025 name_len = strlen (name) + strlen (suffix) + 2;
35027 if (make_unique)
35028 name_len += strlen (unique_name) + 1;
35029 global_var_name = XNEWVEC (char, name_len);
35031 /* Use '.' to concatenate names as it is demangler friendly. */
35032 if (make_unique)
35033 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35034 suffix);
35035 else
35036 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35038 return global_var_name;
35041 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35043 /* Make a dispatcher declaration for the multi-versioned function DECL.
35044 Calls to DECL function will be replaced with calls to the dispatcher
35045 by the front-end. Return the decl created. */
35047 static tree
35048 make_dispatcher_decl (const tree decl)
35050 tree func_decl;
35051 char *func_name;
35052 tree fn_type, func_type;
35053 bool is_uniq = false;
35055 if (TREE_PUBLIC (decl) == 0)
35056 is_uniq = true;
35058 func_name = make_name (decl, "ifunc", is_uniq);
35060 fn_type = TREE_TYPE (decl);
35061 func_type = build_function_type (TREE_TYPE (fn_type),
35062 TYPE_ARG_TYPES (fn_type));
35064 func_decl = build_fn_decl (func_name, func_type);
35065 XDELETEVEC (func_name);
35066 TREE_USED (func_decl) = 1;
35067 DECL_CONTEXT (func_decl) = NULL_TREE;
35068 DECL_INITIAL (func_decl) = error_mark_node;
35069 DECL_ARTIFICIAL (func_decl) = 1;
35070 /* Mark this func as external, the resolver will flip it again if
35071 it gets generated. */
35072 DECL_EXTERNAL (func_decl) = 1;
35073 /* This will be of type IFUNCs have to be externally visible. */
35074 TREE_PUBLIC (func_decl) = 1;
35076 return func_decl;
35079 #endif
35081 /* Returns true if decl is multi-versioned and DECL is the default function,
35082 that is it is not tagged with target specific optimization. */
35084 static bool
35085 is_function_default_version (const tree decl)
35087 if (TREE_CODE (decl) != FUNCTION_DECL
35088 || !DECL_FUNCTION_VERSIONED (decl))
35089 return false;
35090 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35091 gcc_assert (attr);
35092 attr = TREE_VALUE (TREE_VALUE (attr));
35093 return (TREE_CODE (attr) == STRING_CST
35094 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35097 /* Make a dispatcher declaration for the multi-versioned function DECL.
35098 Calls to DECL function will be replaced with calls to the dispatcher
35099 by the front-end. Returns the decl of the dispatcher function. */
35101 static tree
35102 ix86_get_function_versions_dispatcher (void *decl)
35104 tree fn = (tree) decl;
35105 struct cgraph_node *node = NULL;
35106 struct cgraph_node *default_node = NULL;
35107 struct cgraph_function_version_info *node_v = NULL;
35108 struct cgraph_function_version_info *first_v = NULL;
35110 tree dispatch_decl = NULL;
35112 struct cgraph_function_version_info *default_version_info = NULL;
35114 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35116 node = cgraph_node::get (fn);
35117 gcc_assert (node != NULL);
35119 node_v = node->function_version ();
35120 gcc_assert (node_v != NULL);
35122 if (node_v->dispatcher_resolver != NULL)
35123 return node_v->dispatcher_resolver;
35125 /* Find the default version and make it the first node. */
35126 first_v = node_v;
35127 /* Go to the beginning of the chain. */
35128 while (first_v->prev != NULL)
35129 first_v = first_v->prev;
35130 default_version_info = first_v;
35131 while (default_version_info != NULL)
35133 if (is_function_default_version
35134 (default_version_info->this_node->decl))
35135 break;
35136 default_version_info = default_version_info->next;
35139 /* If there is no default node, just return NULL. */
35140 if (default_version_info == NULL)
35141 return NULL;
35143 /* Make default info the first node. */
35144 if (first_v != default_version_info)
35146 default_version_info->prev->next = default_version_info->next;
35147 if (default_version_info->next)
35148 default_version_info->next->prev = default_version_info->prev;
35149 first_v->prev = default_version_info;
35150 default_version_info->next = first_v;
35151 default_version_info->prev = NULL;
35154 default_node = default_version_info->this_node;
35156 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35157 if (targetm.has_ifunc_p ())
35159 struct cgraph_function_version_info *it_v = NULL;
35160 struct cgraph_node *dispatcher_node = NULL;
35161 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35163 /* Right now, the dispatching is done via ifunc. */
35164 dispatch_decl = make_dispatcher_decl (default_node->decl);
35166 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35167 gcc_assert (dispatcher_node != NULL);
35168 dispatcher_node->dispatcher_function = 1;
35169 dispatcher_version_info
35170 = dispatcher_node->insert_new_function_version ();
35171 dispatcher_version_info->next = default_version_info;
35172 dispatcher_node->definition = 1;
35174 /* Set the dispatcher for all the versions. */
35175 it_v = default_version_info;
35176 while (it_v != NULL)
35178 it_v->dispatcher_resolver = dispatch_decl;
35179 it_v = it_v->next;
35182 else
35183 #endif
35185 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35186 "multiversioning needs ifunc which is not supported "
35187 "on this target");
35190 return dispatch_decl;
35193 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35194 it to CHAIN. */
35196 static tree
35197 make_attribute (const char *name, const char *arg_name, tree chain)
35199 tree attr_name;
35200 tree attr_arg_name;
35201 tree attr_args;
35202 tree attr;
35204 attr_name = get_identifier (name);
35205 attr_arg_name = build_string (strlen (arg_name), arg_name);
35206 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35207 attr = tree_cons (attr_name, attr_args, chain);
35208 return attr;
35211 /* Make the resolver function decl to dispatch the versions of
35212 a multi-versioned function, DEFAULT_DECL. Create an
35213 empty basic block in the resolver and store the pointer in
35214 EMPTY_BB. Return the decl of the resolver function. */
35216 static tree
35217 make_resolver_func (const tree default_decl,
35218 const tree dispatch_decl,
35219 basic_block *empty_bb)
35221 char *resolver_name;
35222 tree decl, type, decl_name, t;
35223 bool is_uniq = false;
35225 /* IFUNC's have to be globally visible. So, if the default_decl is
35226 not, then the name of the IFUNC should be made unique. */
35227 if (TREE_PUBLIC (default_decl) == 0)
35228 is_uniq = true;
35230 /* Append the filename to the resolver function if the versions are
35231 not externally visible. This is because the resolver function has
35232 to be externally visible for the loader to find it. So, appending
35233 the filename will prevent conflicts with a resolver function from
35234 another module which is based on the same version name. */
35235 resolver_name = make_name (default_decl, "resolver", is_uniq);
35237 /* The resolver function should return a (void *). */
35238 type = build_function_type_list (ptr_type_node, NULL_TREE);
35240 decl = build_fn_decl (resolver_name, type);
35241 decl_name = get_identifier (resolver_name);
35242 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35244 DECL_NAME (decl) = decl_name;
35245 TREE_USED (decl) = 1;
35246 DECL_ARTIFICIAL (decl) = 1;
35247 DECL_IGNORED_P (decl) = 0;
35248 /* IFUNC resolvers have to be externally visible. */
35249 TREE_PUBLIC (decl) = 1;
35250 DECL_UNINLINABLE (decl) = 1;
35252 /* Resolver is not external, body is generated. */
35253 DECL_EXTERNAL (decl) = 0;
35254 DECL_EXTERNAL (dispatch_decl) = 0;
35256 DECL_CONTEXT (decl) = NULL_TREE;
35257 DECL_INITIAL (decl) = make_node (BLOCK);
35258 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35260 if (DECL_COMDAT_GROUP (default_decl)
35261 || TREE_PUBLIC (default_decl))
35263 /* In this case, each translation unit with a call to this
35264 versioned function will put out a resolver. Ensure it
35265 is comdat to keep just one copy. */
35266 DECL_COMDAT (decl) = 1;
35267 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35269 /* Build result decl and add to function_decl. */
35270 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35271 DECL_ARTIFICIAL (t) = 1;
35272 DECL_IGNORED_P (t) = 1;
35273 DECL_RESULT (decl) = t;
35275 gimplify_function_tree (decl);
35276 push_cfun (DECL_STRUCT_FUNCTION (decl));
35277 *empty_bb = init_lowered_empty_function (decl, false, 0);
35279 cgraph_node::add_new_function (decl, true);
35280 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35282 pop_cfun ();
35284 gcc_assert (dispatch_decl != NULL);
35285 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35286 DECL_ATTRIBUTES (dispatch_decl)
35287 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35289 /* Create the alias for dispatch to resolver here. */
35290 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35291 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35292 XDELETEVEC (resolver_name);
35293 return decl;
35296 /* Generate the dispatching code body to dispatch multi-versioned function
35297 DECL. The target hook is called to process the "target" attributes and
35298 provide the code to dispatch the right function at run-time. NODE points
35299 to the dispatcher decl whose body will be created. */
35301 static tree
35302 ix86_generate_version_dispatcher_body (void *node_p)
35304 tree resolver_decl;
35305 basic_block empty_bb;
35306 tree default_ver_decl;
35307 struct cgraph_node *versn;
35308 struct cgraph_node *node;
35310 struct cgraph_function_version_info *node_version_info = NULL;
35311 struct cgraph_function_version_info *versn_info = NULL;
35313 node = (cgraph_node *)node_p;
35315 node_version_info = node->function_version ();
35316 gcc_assert (node->dispatcher_function
35317 && node_version_info != NULL);
35319 if (node_version_info->dispatcher_resolver)
35320 return node_version_info->dispatcher_resolver;
35322 /* The first version in the chain corresponds to the default version. */
35323 default_ver_decl = node_version_info->next->this_node->decl;
35325 /* node is going to be an alias, so remove the finalized bit. */
35326 node->definition = false;
35328 resolver_decl = make_resolver_func (default_ver_decl,
35329 node->decl, &empty_bb);
35331 node_version_info->dispatcher_resolver = resolver_decl;
35333 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35335 auto_vec<tree, 2> fn_ver_vec;
35337 for (versn_info = node_version_info->next; versn_info;
35338 versn_info = versn_info->next)
35340 versn = versn_info->this_node;
35341 /* Check for virtual functions here again, as by this time it should
35342 have been determined if this function needs a vtable index or
35343 not. This happens for methods in derived classes that override
35344 virtual methods in base classes but are not explicitly marked as
35345 virtual. */
35346 if (DECL_VINDEX (versn->decl))
35347 sorry ("Virtual function multiversioning not supported");
35349 fn_ver_vec.safe_push (versn->decl);
35352 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35353 cgraph_edge::rebuild_edges ();
35354 pop_cfun ();
35355 return resolver_decl;
35357 /* This builds the processor_model struct type defined in
35358 libgcc/config/i386/cpuinfo.c */
35360 static tree
35361 build_processor_model_struct (void)
35363 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35364 "__cpu_features"};
35365 tree field = NULL_TREE, field_chain = NULL_TREE;
35366 int i;
35367 tree type = make_node (RECORD_TYPE);
35369 /* The first 3 fields are unsigned int. */
35370 for (i = 0; i < 3; ++i)
35372 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35373 get_identifier (field_name[i]), unsigned_type_node);
35374 if (field_chain != NULL_TREE)
35375 DECL_CHAIN (field) = field_chain;
35376 field_chain = field;
35379 /* The last field is an array of unsigned integers of size one. */
35380 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35381 get_identifier (field_name[3]),
35382 build_array_type (unsigned_type_node,
35383 build_index_type (size_one_node)));
35384 if (field_chain != NULL_TREE)
35385 DECL_CHAIN (field) = field_chain;
35386 field_chain = field;
35388 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35389 return type;
35392 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35394 static tree
35395 make_var_decl (tree type, const char *name)
35397 tree new_decl;
35399 new_decl = build_decl (UNKNOWN_LOCATION,
35400 VAR_DECL,
35401 get_identifier(name),
35402 type);
35404 DECL_EXTERNAL (new_decl) = 1;
35405 TREE_STATIC (new_decl) = 1;
35406 TREE_PUBLIC (new_decl) = 1;
35407 DECL_INITIAL (new_decl) = 0;
35408 DECL_ARTIFICIAL (new_decl) = 0;
35409 DECL_PRESERVE_P (new_decl) = 1;
35411 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35412 assemble_variable (new_decl, 0, 0, 0);
35414 return new_decl;
35417 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35418 into an integer defined in libgcc/config/i386/cpuinfo.c */
35420 static tree
35421 fold_builtin_cpu (tree fndecl, tree *args)
35423 unsigned int i;
35424 enum ix86_builtins fn_code = (enum ix86_builtins)
35425 DECL_FUNCTION_CODE (fndecl);
35426 tree param_string_cst = NULL;
35428 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35429 enum processor_features
35431 F_CMOV = 0,
35432 F_MMX,
35433 F_POPCNT,
35434 F_SSE,
35435 F_SSE2,
35436 F_SSE3,
35437 F_SSSE3,
35438 F_SSE4_1,
35439 F_SSE4_2,
35440 F_AVX,
35441 F_AVX2,
35442 F_SSE4_A,
35443 F_FMA4,
35444 F_XOP,
35445 F_FMA,
35446 F_AVX512F,
35447 F_BMI,
35448 F_BMI2,
35449 F_MAX
35452 /* These are the values for vendor types and cpu types and subtypes
35453 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35454 the corresponding start value. */
35455 enum processor_model
35457 M_INTEL = 1,
35458 M_AMD,
35459 M_CPU_TYPE_START,
35460 M_INTEL_BONNELL,
35461 M_INTEL_CORE2,
35462 M_INTEL_COREI7,
35463 M_AMDFAM10H,
35464 M_AMDFAM15H,
35465 M_INTEL_SILVERMONT,
35466 M_INTEL_KNL,
35467 M_AMD_BTVER1,
35468 M_AMD_BTVER2,
35469 M_CPU_SUBTYPE_START,
35470 M_INTEL_COREI7_NEHALEM,
35471 M_INTEL_COREI7_WESTMERE,
35472 M_INTEL_COREI7_SANDYBRIDGE,
35473 M_AMDFAM10H_BARCELONA,
35474 M_AMDFAM10H_SHANGHAI,
35475 M_AMDFAM10H_ISTANBUL,
35476 M_AMDFAM15H_BDVER1,
35477 M_AMDFAM15H_BDVER2,
35478 M_AMDFAM15H_BDVER3,
35479 M_AMDFAM15H_BDVER4,
35480 M_INTEL_COREI7_IVYBRIDGE,
35481 M_INTEL_COREI7_HASWELL,
35482 M_INTEL_COREI7_BROADWELL
35485 static struct _arch_names_table
35487 const char *const name;
35488 const enum processor_model model;
35490 const arch_names_table[] =
35492 {"amd", M_AMD},
35493 {"intel", M_INTEL},
35494 {"atom", M_INTEL_BONNELL},
35495 {"slm", M_INTEL_SILVERMONT},
35496 {"core2", M_INTEL_CORE2},
35497 {"corei7", M_INTEL_COREI7},
35498 {"nehalem", M_INTEL_COREI7_NEHALEM},
35499 {"westmere", M_INTEL_COREI7_WESTMERE},
35500 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35501 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35502 {"haswell", M_INTEL_COREI7_HASWELL},
35503 {"broadwell", M_INTEL_COREI7_BROADWELL},
35504 {"bonnell", M_INTEL_BONNELL},
35505 {"silvermont", M_INTEL_SILVERMONT},
35506 {"knl", M_INTEL_KNL},
35507 {"amdfam10h", M_AMDFAM10H},
35508 {"barcelona", M_AMDFAM10H_BARCELONA},
35509 {"shanghai", M_AMDFAM10H_SHANGHAI},
35510 {"istanbul", M_AMDFAM10H_ISTANBUL},
35511 {"btver1", M_AMD_BTVER1},
35512 {"amdfam15h", M_AMDFAM15H},
35513 {"bdver1", M_AMDFAM15H_BDVER1},
35514 {"bdver2", M_AMDFAM15H_BDVER2},
35515 {"bdver3", M_AMDFAM15H_BDVER3},
35516 {"bdver4", M_AMDFAM15H_BDVER4},
35517 {"btver2", M_AMD_BTVER2},
35520 static struct _isa_names_table
35522 const char *const name;
35523 const enum processor_features feature;
35525 const isa_names_table[] =
35527 {"cmov", F_CMOV},
35528 {"mmx", F_MMX},
35529 {"popcnt", F_POPCNT},
35530 {"sse", F_SSE},
35531 {"sse2", F_SSE2},
35532 {"sse3", F_SSE3},
35533 {"ssse3", F_SSSE3},
35534 {"sse4a", F_SSE4_A},
35535 {"sse4.1", F_SSE4_1},
35536 {"sse4.2", F_SSE4_2},
35537 {"avx", F_AVX},
35538 {"fma4", F_FMA4},
35539 {"xop", F_XOP},
35540 {"fma", F_FMA},
35541 {"avx2", F_AVX2},
35542 {"avx512f",F_AVX512F},
35543 {"bmi", F_BMI},
35544 {"bmi2", F_BMI2}
35547 tree __processor_model_type = build_processor_model_struct ();
35548 tree __cpu_model_var = make_var_decl (__processor_model_type,
35549 "__cpu_model");
35552 varpool_node::add (__cpu_model_var);
35554 gcc_assert ((args != NULL) && (*args != NULL));
35556 param_string_cst = *args;
35557 while (param_string_cst
35558 && TREE_CODE (param_string_cst) != STRING_CST)
35560 /* *args must be a expr that can contain other EXPRS leading to a
35561 STRING_CST. */
35562 if (!EXPR_P (param_string_cst))
35564 error ("Parameter to builtin must be a string constant or literal");
35565 return integer_zero_node;
35567 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35570 gcc_assert (param_string_cst);
35572 if (fn_code == IX86_BUILTIN_CPU_IS)
35574 tree ref;
35575 tree field;
35576 tree final;
35578 unsigned int field_val = 0;
35579 unsigned int NUM_ARCH_NAMES
35580 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35582 for (i = 0; i < NUM_ARCH_NAMES; i++)
35583 if (strcmp (arch_names_table[i].name,
35584 TREE_STRING_POINTER (param_string_cst)) == 0)
35585 break;
35587 if (i == NUM_ARCH_NAMES)
35589 error ("Parameter to builtin not valid: %s",
35590 TREE_STRING_POINTER (param_string_cst));
35591 return integer_zero_node;
35594 field = TYPE_FIELDS (__processor_model_type);
35595 field_val = arch_names_table[i].model;
35597 /* CPU types are stored in the next field. */
35598 if (field_val > M_CPU_TYPE_START
35599 && field_val < M_CPU_SUBTYPE_START)
35601 field = DECL_CHAIN (field);
35602 field_val -= M_CPU_TYPE_START;
35605 /* CPU subtypes are stored in the next field. */
35606 if (field_val > M_CPU_SUBTYPE_START)
35608 field = DECL_CHAIN ( DECL_CHAIN (field));
35609 field_val -= M_CPU_SUBTYPE_START;
35612 /* Get the appropriate field in __cpu_model. */
35613 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35614 field, NULL_TREE);
35616 /* Check the value. */
35617 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35618 build_int_cstu (unsigned_type_node, field_val));
35619 return build1 (CONVERT_EXPR, integer_type_node, final);
35621 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35623 tree ref;
35624 tree array_elt;
35625 tree field;
35626 tree final;
35628 unsigned int field_val = 0;
35629 unsigned int NUM_ISA_NAMES
35630 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35632 for (i = 0; i < NUM_ISA_NAMES; i++)
35633 if (strcmp (isa_names_table[i].name,
35634 TREE_STRING_POINTER (param_string_cst)) == 0)
35635 break;
35637 if (i == NUM_ISA_NAMES)
35639 error ("Parameter to builtin not valid: %s",
35640 TREE_STRING_POINTER (param_string_cst));
35641 return integer_zero_node;
35644 field = TYPE_FIELDS (__processor_model_type);
35645 /* Get the last field, which is __cpu_features. */
35646 while (DECL_CHAIN (field))
35647 field = DECL_CHAIN (field);
35649 /* Get the appropriate field: __cpu_model.__cpu_features */
35650 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35651 field, NULL_TREE);
35653 /* Access the 0th element of __cpu_features array. */
35654 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35655 integer_zero_node, NULL_TREE, NULL_TREE);
35657 field_val = (1 << isa_names_table[i].feature);
35658 /* Return __cpu_model.__cpu_features[0] & field_val */
35659 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35660 build_int_cstu (unsigned_type_node, field_val));
35661 return build1 (CONVERT_EXPR, integer_type_node, final);
35663 gcc_unreachable ();
35666 static tree
35667 ix86_fold_builtin (tree fndecl, int n_args,
35668 tree *args, bool ignore ATTRIBUTE_UNUSED)
35670 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35672 enum ix86_builtins fn_code = (enum ix86_builtins)
35673 DECL_FUNCTION_CODE (fndecl);
35674 if (fn_code == IX86_BUILTIN_CPU_IS
35675 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35677 gcc_assert (n_args == 1);
35678 return fold_builtin_cpu (fndecl, args);
35682 #ifdef SUBTARGET_FOLD_BUILTIN
35683 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35684 #endif
35686 return NULL_TREE;
35689 /* Make builtins to detect cpu type and features supported. NAME is
35690 the builtin name, CODE is the builtin code, and FTYPE is the function
35691 type of the builtin. */
35693 static void
35694 make_cpu_type_builtin (const char* name, int code,
35695 enum ix86_builtin_func_type ftype, bool is_const)
35697 tree decl;
35698 tree type;
35700 type = ix86_get_builtin_func_type (ftype);
35701 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35702 NULL, NULL_TREE);
35703 gcc_assert (decl != NULL_TREE);
35704 ix86_builtins[(int) code] = decl;
35705 TREE_READONLY (decl) = is_const;
35708 /* Make builtins to get CPU type and features supported. The created
35709 builtins are :
35711 __builtin_cpu_init (), to detect cpu type and features,
35712 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35713 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35716 static void
35717 ix86_init_platform_type_builtins (void)
35719 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35720 INT_FTYPE_VOID, false);
35721 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35722 INT_FTYPE_PCCHAR, true);
35723 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35724 INT_FTYPE_PCCHAR, true);
35727 /* Internal method for ix86_init_builtins. */
35729 static void
35730 ix86_init_builtins_va_builtins_abi (void)
35732 tree ms_va_ref, sysv_va_ref;
35733 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35734 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35735 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35736 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35738 if (!TARGET_64BIT)
35739 return;
35740 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35741 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35742 ms_va_ref = build_reference_type (ms_va_list_type_node);
35743 sysv_va_ref =
35744 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35746 fnvoid_va_end_ms =
35747 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35748 fnvoid_va_start_ms =
35749 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35750 fnvoid_va_end_sysv =
35751 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35752 fnvoid_va_start_sysv =
35753 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35754 NULL_TREE);
35755 fnvoid_va_copy_ms =
35756 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35757 NULL_TREE);
35758 fnvoid_va_copy_sysv =
35759 build_function_type_list (void_type_node, sysv_va_ref,
35760 sysv_va_ref, NULL_TREE);
35762 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35763 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35764 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35765 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35766 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35767 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35768 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35769 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35770 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35771 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35772 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35773 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35776 static void
35777 ix86_init_builtin_types (void)
35779 tree float128_type_node, float80_type_node;
35781 /* The __float80 type. */
35782 float80_type_node = long_double_type_node;
35783 if (TYPE_MODE (float80_type_node) != XFmode)
35785 /* The __float80 type. */
35786 float80_type_node = make_node (REAL_TYPE);
35788 TYPE_PRECISION (float80_type_node) = 80;
35789 layout_type (float80_type_node);
35791 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35793 /* The __float128 type. */
35794 float128_type_node = make_node (REAL_TYPE);
35795 TYPE_PRECISION (float128_type_node) = 128;
35796 layout_type (float128_type_node);
35797 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35799 /* This macro is built by i386-builtin-types.awk. */
35800 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35803 static void
35804 ix86_init_builtins (void)
35806 tree t;
35808 ix86_init_builtin_types ();
35810 /* Builtins to get CPU type and features. */
35811 ix86_init_platform_type_builtins ();
35813 /* TFmode support builtins. */
35814 def_builtin_const (0, "__builtin_infq",
35815 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35816 def_builtin_const (0, "__builtin_huge_valq",
35817 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35819 /* We will expand them to normal call if SSE isn't available since
35820 they are used by libgcc. */
35821 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35822 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35823 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35824 TREE_READONLY (t) = 1;
35825 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35827 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35828 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35829 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35830 TREE_READONLY (t) = 1;
35831 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35833 ix86_init_tm_builtins ();
35834 ix86_init_mmx_sse_builtins ();
35835 ix86_init_mpx_builtins ();
35837 if (TARGET_LP64)
35838 ix86_init_builtins_va_builtins_abi ();
35840 #ifdef SUBTARGET_INIT_BUILTINS
35841 SUBTARGET_INIT_BUILTINS;
35842 #endif
35845 /* Return the ix86 builtin for CODE. */
35847 static tree
35848 ix86_builtin_decl (unsigned code, bool)
35850 if (code >= IX86_BUILTIN_MAX)
35851 return error_mark_node;
35853 return ix86_builtins[code];
35856 /* Errors in the source file can cause expand_expr to return const0_rtx
35857 where we expect a vector. To avoid crashing, use one of the vector
35858 clear instructions. */
35859 static rtx
35860 safe_vector_operand (rtx x, machine_mode mode)
35862 if (x == const0_rtx)
35863 x = CONST0_RTX (mode);
35864 return x;
35867 /* Fixup modeless constants to fit required mode. */
35868 static rtx
35869 fixup_modeless_constant (rtx x, machine_mode mode)
35871 if (GET_MODE (x) == VOIDmode)
35872 x = convert_to_mode (mode, x, 1);
35873 return x;
35876 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35878 static rtx
35879 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35881 rtx pat;
35882 tree arg0 = CALL_EXPR_ARG (exp, 0);
35883 tree arg1 = CALL_EXPR_ARG (exp, 1);
35884 rtx op0 = expand_normal (arg0);
35885 rtx op1 = expand_normal (arg1);
35886 machine_mode tmode = insn_data[icode].operand[0].mode;
35887 machine_mode mode0 = insn_data[icode].operand[1].mode;
35888 machine_mode mode1 = insn_data[icode].operand[2].mode;
35890 if (VECTOR_MODE_P (mode0))
35891 op0 = safe_vector_operand (op0, mode0);
35892 if (VECTOR_MODE_P (mode1))
35893 op1 = safe_vector_operand (op1, mode1);
35895 if (optimize || !target
35896 || GET_MODE (target) != tmode
35897 || !insn_data[icode].operand[0].predicate (target, tmode))
35898 target = gen_reg_rtx (tmode);
35900 if (GET_MODE (op1) == SImode && mode1 == TImode)
35902 rtx x = gen_reg_rtx (V4SImode);
35903 emit_insn (gen_sse2_loadd (x, op1));
35904 op1 = gen_lowpart (TImode, x);
35907 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35908 op0 = copy_to_mode_reg (mode0, op0);
35909 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35910 op1 = copy_to_mode_reg (mode1, op1);
35912 pat = GEN_FCN (icode) (target, op0, op1);
35913 if (! pat)
35914 return 0;
35916 emit_insn (pat);
35918 return target;
35921 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35923 static rtx
35924 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35925 enum ix86_builtin_func_type m_type,
35926 enum rtx_code sub_code)
35928 rtx pat;
35929 int i;
35930 int nargs;
35931 bool comparison_p = false;
35932 bool tf_p = false;
35933 bool last_arg_constant = false;
35934 int num_memory = 0;
35935 struct {
35936 rtx op;
35937 machine_mode mode;
35938 } args[4];
35940 machine_mode tmode = insn_data[icode].operand[0].mode;
35942 switch (m_type)
35944 case MULTI_ARG_4_DF2_DI_I:
35945 case MULTI_ARG_4_DF2_DI_I1:
35946 case MULTI_ARG_4_SF2_SI_I:
35947 case MULTI_ARG_4_SF2_SI_I1:
35948 nargs = 4;
35949 last_arg_constant = true;
35950 break;
35952 case MULTI_ARG_3_SF:
35953 case MULTI_ARG_3_DF:
35954 case MULTI_ARG_3_SF2:
35955 case MULTI_ARG_3_DF2:
35956 case MULTI_ARG_3_DI:
35957 case MULTI_ARG_3_SI:
35958 case MULTI_ARG_3_SI_DI:
35959 case MULTI_ARG_3_HI:
35960 case MULTI_ARG_3_HI_SI:
35961 case MULTI_ARG_3_QI:
35962 case MULTI_ARG_3_DI2:
35963 case MULTI_ARG_3_SI2:
35964 case MULTI_ARG_3_HI2:
35965 case MULTI_ARG_3_QI2:
35966 nargs = 3;
35967 break;
35969 case MULTI_ARG_2_SF:
35970 case MULTI_ARG_2_DF:
35971 case MULTI_ARG_2_DI:
35972 case MULTI_ARG_2_SI:
35973 case MULTI_ARG_2_HI:
35974 case MULTI_ARG_2_QI:
35975 nargs = 2;
35976 break;
35978 case MULTI_ARG_2_DI_IMM:
35979 case MULTI_ARG_2_SI_IMM:
35980 case MULTI_ARG_2_HI_IMM:
35981 case MULTI_ARG_2_QI_IMM:
35982 nargs = 2;
35983 last_arg_constant = true;
35984 break;
35986 case MULTI_ARG_1_SF:
35987 case MULTI_ARG_1_DF:
35988 case MULTI_ARG_1_SF2:
35989 case MULTI_ARG_1_DF2:
35990 case MULTI_ARG_1_DI:
35991 case MULTI_ARG_1_SI:
35992 case MULTI_ARG_1_HI:
35993 case MULTI_ARG_1_QI:
35994 case MULTI_ARG_1_SI_DI:
35995 case MULTI_ARG_1_HI_DI:
35996 case MULTI_ARG_1_HI_SI:
35997 case MULTI_ARG_1_QI_DI:
35998 case MULTI_ARG_1_QI_SI:
35999 case MULTI_ARG_1_QI_HI:
36000 nargs = 1;
36001 break;
36003 case MULTI_ARG_2_DI_CMP:
36004 case MULTI_ARG_2_SI_CMP:
36005 case MULTI_ARG_2_HI_CMP:
36006 case MULTI_ARG_2_QI_CMP:
36007 nargs = 2;
36008 comparison_p = true;
36009 break;
36011 case MULTI_ARG_2_SF_TF:
36012 case MULTI_ARG_2_DF_TF:
36013 case MULTI_ARG_2_DI_TF:
36014 case MULTI_ARG_2_SI_TF:
36015 case MULTI_ARG_2_HI_TF:
36016 case MULTI_ARG_2_QI_TF:
36017 nargs = 2;
36018 tf_p = true;
36019 break;
36021 default:
36022 gcc_unreachable ();
36025 if (optimize || !target
36026 || GET_MODE (target) != tmode
36027 || !insn_data[icode].operand[0].predicate (target, tmode))
36028 target = gen_reg_rtx (tmode);
36030 gcc_assert (nargs <= 4);
36032 for (i = 0; i < nargs; i++)
36034 tree arg = CALL_EXPR_ARG (exp, i);
36035 rtx op = expand_normal (arg);
36036 int adjust = (comparison_p) ? 1 : 0;
36037 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36039 if (last_arg_constant && i == nargs - 1)
36041 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36043 enum insn_code new_icode = icode;
36044 switch (icode)
36046 case CODE_FOR_xop_vpermil2v2df3:
36047 case CODE_FOR_xop_vpermil2v4sf3:
36048 case CODE_FOR_xop_vpermil2v4df3:
36049 case CODE_FOR_xop_vpermil2v8sf3:
36050 error ("the last argument must be a 2-bit immediate");
36051 return gen_reg_rtx (tmode);
36052 case CODE_FOR_xop_rotlv2di3:
36053 new_icode = CODE_FOR_rotlv2di3;
36054 goto xop_rotl;
36055 case CODE_FOR_xop_rotlv4si3:
36056 new_icode = CODE_FOR_rotlv4si3;
36057 goto xop_rotl;
36058 case CODE_FOR_xop_rotlv8hi3:
36059 new_icode = CODE_FOR_rotlv8hi3;
36060 goto xop_rotl;
36061 case CODE_FOR_xop_rotlv16qi3:
36062 new_icode = CODE_FOR_rotlv16qi3;
36063 xop_rotl:
36064 if (CONST_INT_P (op))
36066 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36067 op = GEN_INT (INTVAL (op) & mask);
36068 gcc_checking_assert
36069 (insn_data[icode].operand[i + 1].predicate (op, mode));
36071 else
36073 gcc_checking_assert
36074 (nargs == 2
36075 && insn_data[new_icode].operand[0].mode == tmode
36076 && insn_data[new_icode].operand[1].mode == tmode
36077 && insn_data[new_icode].operand[2].mode == mode
36078 && insn_data[new_icode].operand[0].predicate
36079 == insn_data[icode].operand[0].predicate
36080 && insn_data[new_icode].operand[1].predicate
36081 == insn_data[icode].operand[1].predicate);
36082 icode = new_icode;
36083 goto non_constant;
36085 break;
36086 default:
36087 gcc_unreachable ();
36091 else
36093 non_constant:
36094 if (VECTOR_MODE_P (mode))
36095 op = safe_vector_operand (op, mode);
36097 /* If we aren't optimizing, only allow one memory operand to be
36098 generated. */
36099 if (memory_operand (op, mode))
36100 num_memory++;
36102 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36104 if (optimize
36105 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36106 || num_memory > 1)
36107 op = force_reg (mode, op);
36110 args[i].op = op;
36111 args[i].mode = mode;
36114 switch (nargs)
36116 case 1:
36117 pat = GEN_FCN (icode) (target, args[0].op);
36118 break;
36120 case 2:
36121 if (tf_p)
36122 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36123 GEN_INT ((int)sub_code));
36124 else if (! comparison_p)
36125 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36126 else
36128 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36129 args[0].op,
36130 args[1].op);
36132 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36134 break;
36136 case 3:
36137 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36138 break;
36140 case 4:
36141 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36142 break;
36144 default:
36145 gcc_unreachable ();
36148 if (! pat)
36149 return 0;
36151 emit_insn (pat);
36152 return target;
36155 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36156 insns with vec_merge. */
36158 static rtx
36159 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36160 rtx target)
36162 rtx pat;
36163 tree arg0 = CALL_EXPR_ARG (exp, 0);
36164 rtx op1, op0 = expand_normal (arg0);
36165 machine_mode tmode = insn_data[icode].operand[0].mode;
36166 machine_mode mode0 = insn_data[icode].operand[1].mode;
36168 if (optimize || !target
36169 || GET_MODE (target) != tmode
36170 || !insn_data[icode].operand[0].predicate (target, tmode))
36171 target = gen_reg_rtx (tmode);
36173 if (VECTOR_MODE_P (mode0))
36174 op0 = safe_vector_operand (op0, mode0);
36176 if ((optimize && !register_operand (op0, mode0))
36177 || !insn_data[icode].operand[1].predicate (op0, mode0))
36178 op0 = copy_to_mode_reg (mode0, op0);
36180 op1 = op0;
36181 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36182 op1 = copy_to_mode_reg (mode0, op1);
36184 pat = GEN_FCN (icode) (target, op0, op1);
36185 if (! pat)
36186 return 0;
36187 emit_insn (pat);
36188 return target;
36191 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36193 static rtx
36194 ix86_expand_sse_compare (const struct builtin_description *d,
36195 tree exp, rtx target, bool swap)
36197 rtx pat;
36198 tree arg0 = CALL_EXPR_ARG (exp, 0);
36199 tree arg1 = CALL_EXPR_ARG (exp, 1);
36200 rtx op0 = expand_normal (arg0);
36201 rtx op1 = expand_normal (arg1);
36202 rtx op2;
36203 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36204 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36205 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36206 enum rtx_code comparison = d->comparison;
36208 if (VECTOR_MODE_P (mode0))
36209 op0 = safe_vector_operand (op0, mode0);
36210 if (VECTOR_MODE_P (mode1))
36211 op1 = safe_vector_operand (op1, mode1);
36213 /* Swap operands if we have a comparison that isn't available in
36214 hardware. */
36215 if (swap)
36216 std::swap (op0, op1);
36218 if (optimize || !target
36219 || GET_MODE (target) != tmode
36220 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36221 target = gen_reg_rtx (tmode);
36223 if ((optimize && !register_operand (op0, mode0))
36224 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36225 op0 = copy_to_mode_reg (mode0, op0);
36226 if ((optimize && !register_operand (op1, mode1))
36227 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36228 op1 = copy_to_mode_reg (mode1, op1);
36230 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36231 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36232 if (! pat)
36233 return 0;
36234 emit_insn (pat);
36235 return target;
36238 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36240 static rtx
36241 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36242 rtx target)
36244 rtx pat;
36245 tree arg0 = CALL_EXPR_ARG (exp, 0);
36246 tree arg1 = CALL_EXPR_ARG (exp, 1);
36247 rtx op0 = expand_normal (arg0);
36248 rtx op1 = expand_normal (arg1);
36249 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36250 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36251 enum rtx_code comparison = d->comparison;
36253 if (VECTOR_MODE_P (mode0))
36254 op0 = safe_vector_operand (op0, mode0);
36255 if (VECTOR_MODE_P (mode1))
36256 op1 = safe_vector_operand (op1, mode1);
36258 /* Swap operands if we have a comparison that isn't available in
36259 hardware. */
36260 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36261 std::swap (op0, op1);
36263 target = gen_reg_rtx (SImode);
36264 emit_move_insn (target, const0_rtx);
36265 target = gen_rtx_SUBREG (QImode, target, 0);
36267 if ((optimize && !register_operand (op0, mode0))
36268 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36269 op0 = copy_to_mode_reg (mode0, op0);
36270 if ((optimize && !register_operand (op1, mode1))
36271 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36272 op1 = copy_to_mode_reg (mode1, op1);
36274 pat = GEN_FCN (d->icode) (op0, op1);
36275 if (! pat)
36276 return 0;
36277 emit_insn (pat);
36278 emit_insn (gen_rtx_SET (VOIDmode,
36279 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36280 gen_rtx_fmt_ee (comparison, QImode,
36281 SET_DEST (pat),
36282 const0_rtx)));
36284 return SUBREG_REG (target);
36287 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36289 static rtx
36290 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36291 rtx target)
36293 rtx pat;
36294 tree arg0 = CALL_EXPR_ARG (exp, 0);
36295 rtx op1, op0 = expand_normal (arg0);
36296 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36297 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36299 if (optimize || target == 0
36300 || GET_MODE (target) != tmode
36301 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36302 target = gen_reg_rtx (tmode);
36304 if (VECTOR_MODE_P (mode0))
36305 op0 = safe_vector_operand (op0, mode0);
36307 if ((optimize && !register_operand (op0, mode0))
36308 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36309 op0 = copy_to_mode_reg (mode0, op0);
36311 op1 = GEN_INT (d->comparison);
36313 pat = GEN_FCN (d->icode) (target, op0, op1);
36314 if (! pat)
36315 return 0;
36316 emit_insn (pat);
36317 return target;
36320 static rtx
36321 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36322 tree exp, rtx target)
36324 rtx pat;
36325 tree arg0 = CALL_EXPR_ARG (exp, 0);
36326 tree arg1 = CALL_EXPR_ARG (exp, 1);
36327 rtx op0 = expand_normal (arg0);
36328 rtx op1 = expand_normal (arg1);
36329 rtx op2;
36330 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36331 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36332 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36334 if (optimize || target == 0
36335 || GET_MODE (target) != tmode
36336 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36337 target = gen_reg_rtx (tmode);
36339 op0 = safe_vector_operand (op0, mode0);
36340 op1 = safe_vector_operand (op1, mode1);
36342 if ((optimize && !register_operand (op0, mode0))
36343 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36344 op0 = copy_to_mode_reg (mode0, op0);
36345 if ((optimize && !register_operand (op1, mode1))
36346 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36347 op1 = copy_to_mode_reg (mode1, op1);
36349 op2 = GEN_INT (d->comparison);
36351 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36352 if (! pat)
36353 return 0;
36354 emit_insn (pat);
36355 return target;
36358 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36360 static rtx
36361 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36362 rtx target)
36364 rtx pat;
36365 tree arg0 = CALL_EXPR_ARG (exp, 0);
36366 tree arg1 = CALL_EXPR_ARG (exp, 1);
36367 rtx op0 = expand_normal (arg0);
36368 rtx op1 = expand_normal (arg1);
36369 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36370 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36371 enum rtx_code comparison = d->comparison;
36373 if (VECTOR_MODE_P (mode0))
36374 op0 = safe_vector_operand (op0, mode0);
36375 if (VECTOR_MODE_P (mode1))
36376 op1 = safe_vector_operand (op1, mode1);
36378 target = gen_reg_rtx (SImode);
36379 emit_move_insn (target, const0_rtx);
36380 target = gen_rtx_SUBREG (QImode, target, 0);
36382 if ((optimize && !register_operand (op0, mode0))
36383 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36384 op0 = copy_to_mode_reg (mode0, op0);
36385 if ((optimize && !register_operand (op1, mode1))
36386 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36387 op1 = copy_to_mode_reg (mode1, op1);
36389 pat = GEN_FCN (d->icode) (op0, op1);
36390 if (! pat)
36391 return 0;
36392 emit_insn (pat);
36393 emit_insn (gen_rtx_SET (VOIDmode,
36394 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36395 gen_rtx_fmt_ee (comparison, QImode,
36396 SET_DEST (pat),
36397 const0_rtx)));
36399 return SUBREG_REG (target);
36402 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36404 static rtx
36405 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36406 tree exp, rtx target)
36408 rtx pat;
36409 tree arg0 = CALL_EXPR_ARG (exp, 0);
36410 tree arg1 = CALL_EXPR_ARG (exp, 1);
36411 tree arg2 = CALL_EXPR_ARG (exp, 2);
36412 tree arg3 = CALL_EXPR_ARG (exp, 3);
36413 tree arg4 = CALL_EXPR_ARG (exp, 4);
36414 rtx scratch0, scratch1;
36415 rtx op0 = expand_normal (arg0);
36416 rtx op1 = expand_normal (arg1);
36417 rtx op2 = expand_normal (arg2);
36418 rtx op3 = expand_normal (arg3);
36419 rtx op4 = expand_normal (arg4);
36420 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36422 tmode0 = insn_data[d->icode].operand[0].mode;
36423 tmode1 = insn_data[d->icode].operand[1].mode;
36424 modev2 = insn_data[d->icode].operand[2].mode;
36425 modei3 = insn_data[d->icode].operand[3].mode;
36426 modev4 = insn_data[d->icode].operand[4].mode;
36427 modei5 = insn_data[d->icode].operand[5].mode;
36428 modeimm = insn_data[d->icode].operand[6].mode;
36430 if (VECTOR_MODE_P (modev2))
36431 op0 = safe_vector_operand (op0, modev2);
36432 if (VECTOR_MODE_P (modev4))
36433 op2 = safe_vector_operand (op2, modev4);
36435 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36436 op0 = copy_to_mode_reg (modev2, op0);
36437 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36438 op1 = copy_to_mode_reg (modei3, op1);
36439 if ((optimize && !register_operand (op2, modev4))
36440 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36441 op2 = copy_to_mode_reg (modev4, op2);
36442 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36443 op3 = copy_to_mode_reg (modei5, op3);
36445 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36447 error ("the fifth argument must be an 8-bit immediate");
36448 return const0_rtx;
36451 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36453 if (optimize || !target
36454 || GET_MODE (target) != tmode0
36455 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36456 target = gen_reg_rtx (tmode0);
36458 scratch1 = gen_reg_rtx (tmode1);
36460 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36462 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36464 if (optimize || !target
36465 || GET_MODE (target) != tmode1
36466 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36467 target = gen_reg_rtx (tmode1);
36469 scratch0 = gen_reg_rtx (tmode0);
36471 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36473 else
36475 gcc_assert (d->flag);
36477 scratch0 = gen_reg_rtx (tmode0);
36478 scratch1 = gen_reg_rtx (tmode1);
36480 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36483 if (! pat)
36484 return 0;
36486 emit_insn (pat);
36488 if (d->flag)
36490 target = gen_reg_rtx (SImode);
36491 emit_move_insn (target, const0_rtx);
36492 target = gen_rtx_SUBREG (QImode, target, 0);
36494 emit_insn
36495 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36496 gen_rtx_fmt_ee (EQ, QImode,
36497 gen_rtx_REG ((machine_mode) d->flag,
36498 FLAGS_REG),
36499 const0_rtx)));
36500 return SUBREG_REG (target);
36502 else
36503 return target;
36507 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36509 static rtx
36510 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36511 tree exp, rtx target)
36513 rtx pat;
36514 tree arg0 = CALL_EXPR_ARG (exp, 0);
36515 tree arg1 = CALL_EXPR_ARG (exp, 1);
36516 tree arg2 = CALL_EXPR_ARG (exp, 2);
36517 rtx scratch0, scratch1;
36518 rtx op0 = expand_normal (arg0);
36519 rtx op1 = expand_normal (arg1);
36520 rtx op2 = expand_normal (arg2);
36521 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36523 tmode0 = insn_data[d->icode].operand[0].mode;
36524 tmode1 = insn_data[d->icode].operand[1].mode;
36525 modev2 = insn_data[d->icode].operand[2].mode;
36526 modev3 = insn_data[d->icode].operand[3].mode;
36527 modeimm = insn_data[d->icode].operand[4].mode;
36529 if (VECTOR_MODE_P (modev2))
36530 op0 = safe_vector_operand (op0, modev2);
36531 if (VECTOR_MODE_P (modev3))
36532 op1 = safe_vector_operand (op1, modev3);
36534 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36535 op0 = copy_to_mode_reg (modev2, op0);
36536 if ((optimize && !register_operand (op1, modev3))
36537 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36538 op1 = copy_to_mode_reg (modev3, op1);
36540 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36542 error ("the third argument must be an 8-bit immediate");
36543 return const0_rtx;
36546 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36548 if (optimize || !target
36549 || GET_MODE (target) != tmode0
36550 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36551 target = gen_reg_rtx (tmode0);
36553 scratch1 = gen_reg_rtx (tmode1);
36555 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36557 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36559 if (optimize || !target
36560 || GET_MODE (target) != tmode1
36561 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36562 target = gen_reg_rtx (tmode1);
36564 scratch0 = gen_reg_rtx (tmode0);
36566 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36568 else
36570 gcc_assert (d->flag);
36572 scratch0 = gen_reg_rtx (tmode0);
36573 scratch1 = gen_reg_rtx (tmode1);
36575 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36578 if (! pat)
36579 return 0;
36581 emit_insn (pat);
36583 if (d->flag)
36585 target = gen_reg_rtx (SImode);
36586 emit_move_insn (target, const0_rtx);
36587 target = gen_rtx_SUBREG (QImode, target, 0);
36589 emit_insn
36590 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36591 gen_rtx_fmt_ee (EQ, QImode,
36592 gen_rtx_REG ((machine_mode) d->flag,
36593 FLAGS_REG),
36594 const0_rtx)));
36595 return SUBREG_REG (target);
36597 else
36598 return target;
36601 /* Subroutine of ix86_expand_builtin to take care of insns with
36602 variable number of operands. */
36604 static rtx
36605 ix86_expand_args_builtin (const struct builtin_description *d,
36606 tree exp, rtx target)
36608 rtx pat, real_target;
36609 unsigned int i, nargs;
36610 unsigned int nargs_constant = 0;
36611 unsigned int mask_pos = 0;
36612 int num_memory = 0;
36613 struct
36615 rtx op;
36616 machine_mode mode;
36617 } args[6];
36618 bool last_arg_count = false;
36619 enum insn_code icode = d->icode;
36620 const struct insn_data_d *insn_p = &insn_data[icode];
36621 machine_mode tmode = insn_p->operand[0].mode;
36622 machine_mode rmode = VOIDmode;
36623 bool swap = false;
36624 enum rtx_code comparison = d->comparison;
36626 switch ((enum ix86_builtin_func_type) d->flag)
36628 case V2DF_FTYPE_V2DF_ROUND:
36629 case V4DF_FTYPE_V4DF_ROUND:
36630 case V4SF_FTYPE_V4SF_ROUND:
36631 case V8SF_FTYPE_V8SF_ROUND:
36632 case V4SI_FTYPE_V4SF_ROUND:
36633 case V8SI_FTYPE_V8SF_ROUND:
36634 return ix86_expand_sse_round (d, exp, target);
36635 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36636 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36637 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36638 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36639 case INT_FTYPE_V8SF_V8SF_PTEST:
36640 case INT_FTYPE_V4DI_V4DI_PTEST:
36641 case INT_FTYPE_V4DF_V4DF_PTEST:
36642 case INT_FTYPE_V4SF_V4SF_PTEST:
36643 case INT_FTYPE_V2DI_V2DI_PTEST:
36644 case INT_FTYPE_V2DF_V2DF_PTEST:
36645 return ix86_expand_sse_ptest (d, exp, target);
36646 case FLOAT128_FTYPE_FLOAT128:
36647 case FLOAT_FTYPE_FLOAT:
36648 case INT_FTYPE_INT:
36649 case UINT64_FTYPE_INT:
36650 case UINT16_FTYPE_UINT16:
36651 case INT64_FTYPE_INT64:
36652 case INT64_FTYPE_V4SF:
36653 case INT64_FTYPE_V2DF:
36654 case INT_FTYPE_V16QI:
36655 case INT_FTYPE_V8QI:
36656 case INT_FTYPE_V8SF:
36657 case INT_FTYPE_V4DF:
36658 case INT_FTYPE_V4SF:
36659 case INT_FTYPE_V2DF:
36660 case INT_FTYPE_V32QI:
36661 case V16QI_FTYPE_V16QI:
36662 case V8SI_FTYPE_V8SF:
36663 case V8SI_FTYPE_V4SI:
36664 case V8HI_FTYPE_V8HI:
36665 case V8HI_FTYPE_V16QI:
36666 case V8QI_FTYPE_V8QI:
36667 case V8SF_FTYPE_V8SF:
36668 case V8SF_FTYPE_V8SI:
36669 case V8SF_FTYPE_V4SF:
36670 case V8SF_FTYPE_V8HI:
36671 case V4SI_FTYPE_V4SI:
36672 case V4SI_FTYPE_V16QI:
36673 case V4SI_FTYPE_V4SF:
36674 case V4SI_FTYPE_V8SI:
36675 case V4SI_FTYPE_V8HI:
36676 case V4SI_FTYPE_V4DF:
36677 case V4SI_FTYPE_V2DF:
36678 case V4HI_FTYPE_V4HI:
36679 case V4DF_FTYPE_V4DF:
36680 case V4DF_FTYPE_V4SI:
36681 case V4DF_FTYPE_V4SF:
36682 case V4DF_FTYPE_V2DF:
36683 case V4SF_FTYPE_V4SF:
36684 case V4SF_FTYPE_V4SI:
36685 case V4SF_FTYPE_V8SF:
36686 case V4SF_FTYPE_V4DF:
36687 case V4SF_FTYPE_V8HI:
36688 case V4SF_FTYPE_V2DF:
36689 case V2DI_FTYPE_V2DI:
36690 case V2DI_FTYPE_V16QI:
36691 case V2DI_FTYPE_V8HI:
36692 case V2DI_FTYPE_V4SI:
36693 case V2DF_FTYPE_V2DF:
36694 case V2DF_FTYPE_V4SI:
36695 case V2DF_FTYPE_V4DF:
36696 case V2DF_FTYPE_V4SF:
36697 case V2DF_FTYPE_V2SI:
36698 case V2SI_FTYPE_V2SI:
36699 case V2SI_FTYPE_V4SF:
36700 case V2SI_FTYPE_V2SF:
36701 case V2SI_FTYPE_V2DF:
36702 case V2SF_FTYPE_V2SF:
36703 case V2SF_FTYPE_V2SI:
36704 case V32QI_FTYPE_V32QI:
36705 case V32QI_FTYPE_V16QI:
36706 case V16HI_FTYPE_V16HI:
36707 case V16HI_FTYPE_V8HI:
36708 case V8SI_FTYPE_V8SI:
36709 case V16HI_FTYPE_V16QI:
36710 case V8SI_FTYPE_V16QI:
36711 case V4DI_FTYPE_V16QI:
36712 case V8SI_FTYPE_V8HI:
36713 case V4DI_FTYPE_V8HI:
36714 case V4DI_FTYPE_V4SI:
36715 case V4DI_FTYPE_V2DI:
36716 case HI_FTYPE_HI:
36717 case HI_FTYPE_V16QI:
36718 case SI_FTYPE_V32QI:
36719 case DI_FTYPE_V64QI:
36720 case V16QI_FTYPE_HI:
36721 case V32QI_FTYPE_SI:
36722 case V64QI_FTYPE_DI:
36723 case V8HI_FTYPE_QI:
36724 case V16HI_FTYPE_HI:
36725 case V32HI_FTYPE_SI:
36726 case V4SI_FTYPE_QI:
36727 case V8SI_FTYPE_QI:
36728 case V4SI_FTYPE_HI:
36729 case V8SI_FTYPE_HI:
36730 case QI_FTYPE_V8HI:
36731 case HI_FTYPE_V16HI:
36732 case SI_FTYPE_V32HI:
36733 case QI_FTYPE_V4SI:
36734 case QI_FTYPE_V8SI:
36735 case HI_FTYPE_V16SI:
36736 case QI_FTYPE_V2DI:
36737 case QI_FTYPE_V4DI:
36738 case QI_FTYPE_V8DI:
36739 case UINT_FTYPE_V2DF:
36740 case UINT_FTYPE_V4SF:
36741 case UINT64_FTYPE_V2DF:
36742 case UINT64_FTYPE_V4SF:
36743 case V16QI_FTYPE_V8DI:
36744 case V16HI_FTYPE_V16SI:
36745 case V16SI_FTYPE_HI:
36746 case V2DI_FTYPE_QI:
36747 case V4DI_FTYPE_QI:
36748 case V16SI_FTYPE_V16SI:
36749 case V16SI_FTYPE_INT:
36750 case V16SF_FTYPE_FLOAT:
36751 case V16SF_FTYPE_V8SF:
36752 case V16SI_FTYPE_V8SI:
36753 case V16SF_FTYPE_V4SF:
36754 case V16SI_FTYPE_V4SI:
36755 case V16SF_FTYPE_V16SF:
36756 case V8HI_FTYPE_V8DI:
36757 case V8UHI_FTYPE_V8UHI:
36758 case V8SI_FTYPE_V8DI:
36759 case V8SF_FTYPE_V8DF:
36760 case V8DI_FTYPE_QI:
36761 case V8DI_FTYPE_INT64:
36762 case V8DI_FTYPE_V4DI:
36763 case V8DI_FTYPE_V8DI:
36764 case V8DF_FTYPE_DOUBLE:
36765 case V8DF_FTYPE_V4DF:
36766 case V8DF_FTYPE_V2DF:
36767 case V8DF_FTYPE_V8DF:
36768 case V8DF_FTYPE_V8SI:
36769 nargs = 1;
36770 break;
36771 case V4SF_FTYPE_V4SF_VEC_MERGE:
36772 case V2DF_FTYPE_V2DF_VEC_MERGE:
36773 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36774 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36775 case V16QI_FTYPE_V16QI_V16QI:
36776 case V16QI_FTYPE_V8HI_V8HI:
36777 case V16SI_FTYPE_V16SI_V16SI:
36778 case V16SF_FTYPE_V16SF_V16SF:
36779 case V16SF_FTYPE_V16SF_V16SI:
36780 case V8QI_FTYPE_V8QI_V8QI:
36781 case V8QI_FTYPE_V4HI_V4HI:
36782 case V8HI_FTYPE_V8HI_V8HI:
36783 case V8HI_FTYPE_V16QI_V16QI:
36784 case V8HI_FTYPE_V4SI_V4SI:
36785 case V8SF_FTYPE_V8SF_V8SF:
36786 case V8SF_FTYPE_V8SF_V8SI:
36787 case V8DI_FTYPE_V8DI_V8DI:
36788 case V8DF_FTYPE_V8DF_V8DF:
36789 case V8DF_FTYPE_V8DF_V8DI:
36790 case V4SI_FTYPE_V4SI_V4SI:
36791 case V4SI_FTYPE_V8HI_V8HI:
36792 case V4SI_FTYPE_V4SF_V4SF:
36793 case V4SI_FTYPE_V2DF_V2DF:
36794 case V4HI_FTYPE_V4HI_V4HI:
36795 case V4HI_FTYPE_V8QI_V8QI:
36796 case V4HI_FTYPE_V2SI_V2SI:
36797 case V4DF_FTYPE_V4DF_V4DF:
36798 case V4DF_FTYPE_V4DF_V4DI:
36799 case V4SF_FTYPE_V4SF_V4SF:
36800 case V4SF_FTYPE_V4SF_V4SI:
36801 case V4SF_FTYPE_V4SF_V2SI:
36802 case V4SF_FTYPE_V4SF_V2DF:
36803 case V4SF_FTYPE_V4SF_UINT:
36804 case V4SF_FTYPE_V4SF_UINT64:
36805 case V4SF_FTYPE_V4SF_DI:
36806 case V4SF_FTYPE_V4SF_SI:
36807 case V2DI_FTYPE_V2DI_V2DI:
36808 case V2DI_FTYPE_V16QI_V16QI:
36809 case V2DI_FTYPE_V4SI_V4SI:
36810 case V2UDI_FTYPE_V4USI_V4USI:
36811 case V2DI_FTYPE_V2DI_V16QI:
36812 case V2DI_FTYPE_V2DF_V2DF:
36813 case V2SI_FTYPE_V2SI_V2SI:
36814 case V2SI_FTYPE_V4HI_V4HI:
36815 case V2SI_FTYPE_V2SF_V2SF:
36816 case V2DF_FTYPE_V2DF_V2DF:
36817 case V2DF_FTYPE_V2DF_V4SF:
36818 case V2DF_FTYPE_V2DF_V2DI:
36819 case V2DF_FTYPE_V2DF_DI:
36820 case V2DF_FTYPE_V2DF_SI:
36821 case V2DF_FTYPE_V2DF_UINT:
36822 case V2DF_FTYPE_V2DF_UINT64:
36823 case V2SF_FTYPE_V2SF_V2SF:
36824 case V1DI_FTYPE_V1DI_V1DI:
36825 case V1DI_FTYPE_V8QI_V8QI:
36826 case V1DI_FTYPE_V2SI_V2SI:
36827 case V32QI_FTYPE_V16HI_V16HI:
36828 case V16HI_FTYPE_V8SI_V8SI:
36829 case V32QI_FTYPE_V32QI_V32QI:
36830 case V16HI_FTYPE_V32QI_V32QI:
36831 case V16HI_FTYPE_V16HI_V16HI:
36832 case V8SI_FTYPE_V4DF_V4DF:
36833 case V8SI_FTYPE_V8SI_V8SI:
36834 case V8SI_FTYPE_V16HI_V16HI:
36835 case V4DI_FTYPE_V4DI_V4DI:
36836 case V4DI_FTYPE_V8SI_V8SI:
36837 case V4UDI_FTYPE_V8USI_V8USI:
36838 case QI_FTYPE_V8DI_V8DI:
36839 case V8DI_FTYPE_V64QI_V64QI:
36840 case HI_FTYPE_V16SI_V16SI:
36841 if (comparison == UNKNOWN)
36842 return ix86_expand_binop_builtin (icode, exp, target);
36843 nargs = 2;
36844 break;
36845 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36846 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36847 gcc_assert (comparison != UNKNOWN);
36848 nargs = 2;
36849 swap = true;
36850 break;
36851 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36852 case V16HI_FTYPE_V16HI_SI_COUNT:
36853 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36854 case V8SI_FTYPE_V8SI_SI_COUNT:
36855 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36856 case V4DI_FTYPE_V4DI_INT_COUNT:
36857 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36858 case V8HI_FTYPE_V8HI_SI_COUNT:
36859 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36860 case V4SI_FTYPE_V4SI_SI_COUNT:
36861 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36862 case V4HI_FTYPE_V4HI_SI_COUNT:
36863 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36864 case V2DI_FTYPE_V2DI_SI_COUNT:
36865 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36866 case V2SI_FTYPE_V2SI_SI_COUNT:
36867 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36868 case V1DI_FTYPE_V1DI_SI_COUNT:
36869 nargs = 2;
36870 last_arg_count = true;
36871 break;
36872 case UINT64_FTYPE_UINT64_UINT64:
36873 case UINT_FTYPE_UINT_UINT:
36874 case UINT_FTYPE_UINT_USHORT:
36875 case UINT_FTYPE_UINT_UCHAR:
36876 case UINT16_FTYPE_UINT16_INT:
36877 case UINT8_FTYPE_UINT8_INT:
36878 case HI_FTYPE_HI_HI:
36879 case SI_FTYPE_SI_SI:
36880 case DI_FTYPE_DI_DI:
36881 case V16SI_FTYPE_V8DF_V8DF:
36882 nargs = 2;
36883 break;
36884 case V2DI_FTYPE_V2DI_INT_CONVERT:
36885 nargs = 2;
36886 rmode = V1TImode;
36887 nargs_constant = 1;
36888 break;
36889 case V4DI_FTYPE_V4DI_INT_CONVERT:
36890 nargs = 2;
36891 rmode = V2TImode;
36892 nargs_constant = 1;
36893 break;
36894 case V8DI_FTYPE_V8DI_INT_CONVERT:
36895 nargs = 2;
36896 rmode = V4TImode;
36897 nargs_constant = 1;
36898 break;
36899 case V8HI_FTYPE_V8HI_INT:
36900 case V8HI_FTYPE_V8SF_INT:
36901 case V16HI_FTYPE_V16SF_INT:
36902 case V8HI_FTYPE_V4SF_INT:
36903 case V8SF_FTYPE_V8SF_INT:
36904 case V4SF_FTYPE_V16SF_INT:
36905 case V16SF_FTYPE_V16SF_INT:
36906 case V4SI_FTYPE_V4SI_INT:
36907 case V4SI_FTYPE_V8SI_INT:
36908 case V4HI_FTYPE_V4HI_INT:
36909 case V4DF_FTYPE_V4DF_INT:
36910 case V4DF_FTYPE_V8DF_INT:
36911 case V4SF_FTYPE_V4SF_INT:
36912 case V4SF_FTYPE_V8SF_INT:
36913 case V2DI_FTYPE_V2DI_INT:
36914 case V2DF_FTYPE_V2DF_INT:
36915 case V2DF_FTYPE_V4DF_INT:
36916 case V16HI_FTYPE_V16HI_INT:
36917 case V8SI_FTYPE_V8SI_INT:
36918 case V16SI_FTYPE_V16SI_INT:
36919 case V4SI_FTYPE_V16SI_INT:
36920 case V4DI_FTYPE_V4DI_INT:
36921 case V2DI_FTYPE_V4DI_INT:
36922 case V4DI_FTYPE_V8DI_INT:
36923 case HI_FTYPE_HI_INT:
36924 case QI_FTYPE_V4SF_INT:
36925 case QI_FTYPE_V2DF_INT:
36926 nargs = 2;
36927 nargs_constant = 1;
36928 break;
36929 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36930 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36931 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36932 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36933 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36934 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36935 case HI_FTYPE_V16SI_V16SI_HI:
36936 case QI_FTYPE_V8DI_V8DI_QI:
36937 case V16HI_FTYPE_V16SI_V16HI_HI:
36938 case V16QI_FTYPE_V16SI_V16QI_HI:
36939 case V16QI_FTYPE_V8DI_V16QI_QI:
36940 case V16SF_FTYPE_V16SF_V16SF_HI:
36941 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36942 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36943 case V16SF_FTYPE_V16SI_V16SF_HI:
36944 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36945 case V16SF_FTYPE_V4SF_V16SF_HI:
36946 case V16SI_FTYPE_SI_V16SI_HI:
36947 case V16SI_FTYPE_V16HI_V16SI_HI:
36948 case V16SI_FTYPE_V16QI_V16SI_HI:
36949 case V16SI_FTYPE_V16SF_V16SI_HI:
36950 case V8SF_FTYPE_V4SF_V8SF_QI:
36951 case V4DF_FTYPE_V2DF_V4DF_QI:
36952 case V8SI_FTYPE_V4SI_V8SI_QI:
36953 case V8SI_FTYPE_SI_V8SI_QI:
36954 case V4SI_FTYPE_V4SI_V4SI_QI:
36955 case V4SI_FTYPE_SI_V4SI_QI:
36956 case V4DI_FTYPE_V2DI_V4DI_QI:
36957 case V4DI_FTYPE_DI_V4DI_QI:
36958 case V2DI_FTYPE_V2DI_V2DI_QI:
36959 case V2DI_FTYPE_DI_V2DI_QI:
36960 case V64QI_FTYPE_V64QI_V64QI_DI:
36961 case V64QI_FTYPE_V16QI_V64QI_DI:
36962 case V64QI_FTYPE_QI_V64QI_DI:
36963 case V32QI_FTYPE_V32QI_V32QI_SI:
36964 case V32QI_FTYPE_V16QI_V32QI_SI:
36965 case V32QI_FTYPE_QI_V32QI_SI:
36966 case V16QI_FTYPE_V16QI_V16QI_HI:
36967 case V16QI_FTYPE_QI_V16QI_HI:
36968 case V32HI_FTYPE_V8HI_V32HI_SI:
36969 case V32HI_FTYPE_HI_V32HI_SI:
36970 case V16HI_FTYPE_V8HI_V16HI_HI:
36971 case V16HI_FTYPE_HI_V16HI_HI:
36972 case V8HI_FTYPE_V8HI_V8HI_QI:
36973 case V8HI_FTYPE_HI_V8HI_QI:
36974 case V8SF_FTYPE_V8HI_V8SF_QI:
36975 case V4SF_FTYPE_V8HI_V4SF_QI:
36976 case V8SI_FTYPE_V8SF_V8SI_QI:
36977 case V4SI_FTYPE_V4SF_V4SI_QI:
36978 case V8DI_FTYPE_V8SF_V8DI_QI:
36979 case V4DI_FTYPE_V4SF_V4DI_QI:
36980 case V2DI_FTYPE_V4SF_V2DI_QI:
36981 case V8SF_FTYPE_V8DI_V8SF_QI:
36982 case V4SF_FTYPE_V4DI_V4SF_QI:
36983 case V4SF_FTYPE_V2DI_V4SF_QI:
36984 case V8DF_FTYPE_V8DI_V8DF_QI:
36985 case V4DF_FTYPE_V4DI_V4DF_QI:
36986 case V2DF_FTYPE_V2DI_V2DF_QI:
36987 case V16QI_FTYPE_V8HI_V16QI_QI:
36988 case V16QI_FTYPE_V16HI_V16QI_HI:
36989 case V16QI_FTYPE_V4SI_V16QI_QI:
36990 case V16QI_FTYPE_V8SI_V16QI_QI:
36991 case V8HI_FTYPE_V4SI_V8HI_QI:
36992 case V8HI_FTYPE_V8SI_V8HI_QI:
36993 case V16QI_FTYPE_V2DI_V16QI_QI:
36994 case V16QI_FTYPE_V4DI_V16QI_QI:
36995 case V8HI_FTYPE_V2DI_V8HI_QI:
36996 case V8HI_FTYPE_V4DI_V8HI_QI:
36997 case V4SI_FTYPE_V2DI_V4SI_QI:
36998 case V4SI_FTYPE_V4DI_V4SI_QI:
36999 case V32QI_FTYPE_V32HI_V32QI_SI:
37000 case HI_FTYPE_V16QI_V16QI_HI:
37001 case SI_FTYPE_V32QI_V32QI_SI:
37002 case DI_FTYPE_V64QI_V64QI_DI:
37003 case QI_FTYPE_V8HI_V8HI_QI:
37004 case HI_FTYPE_V16HI_V16HI_HI:
37005 case SI_FTYPE_V32HI_V32HI_SI:
37006 case QI_FTYPE_V4SI_V4SI_QI:
37007 case QI_FTYPE_V8SI_V8SI_QI:
37008 case QI_FTYPE_V2DI_V2DI_QI:
37009 case QI_FTYPE_V4DI_V4DI_QI:
37010 case V4SF_FTYPE_V2DF_V4SF_QI:
37011 case V4SF_FTYPE_V4DF_V4SF_QI:
37012 case V16SI_FTYPE_V16SI_V16SI_HI:
37013 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37014 case V16SI_FTYPE_V4SI_V16SI_HI:
37015 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37016 case V2DI_FTYPE_V4SI_V2DI_QI:
37017 case V2DI_FTYPE_V8HI_V2DI_QI:
37018 case V2DI_FTYPE_V16QI_V2DI_QI:
37019 case V4DI_FTYPE_V4DI_V4DI_QI:
37020 case V4DI_FTYPE_V4SI_V4DI_QI:
37021 case V4DI_FTYPE_V8HI_V4DI_QI:
37022 case V4DI_FTYPE_V16QI_V4DI_QI:
37023 case V8DI_FTYPE_V8DF_V8DI_QI:
37024 case V4DI_FTYPE_V4DF_V4DI_QI:
37025 case V2DI_FTYPE_V2DF_V2DI_QI:
37026 case V4SI_FTYPE_V4DF_V4SI_QI:
37027 case V4SI_FTYPE_V2DF_V4SI_QI:
37028 case V4SI_FTYPE_V8HI_V4SI_QI:
37029 case V4SI_FTYPE_V16QI_V4SI_QI:
37030 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37031 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37032 case V8DF_FTYPE_V2DF_V8DF_QI:
37033 case V8DF_FTYPE_V4DF_V8DF_QI:
37034 case V8DF_FTYPE_V8DF_V8DF_QI:
37035 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37036 case V8SF_FTYPE_V8SF_V8SF_QI:
37037 case V8SF_FTYPE_V8SI_V8SF_QI:
37038 case V4DF_FTYPE_V4DF_V4DF_QI:
37039 case V4SF_FTYPE_V4SF_V4SF_QI:
37040 case V2DF_FTYPE_V2DF_V2DF_QI:
37041 case V2DF_FTYPE_V4SF_V2DF_QI:
37042 case V2DF_FTYPE_V4SI_V2DF_QI:
37043 case V4SF_FTYPE_V4SI_V4SF_QI:
37044 case V4DF_FTYPE_V4SF_V4DF_QI:
37045 case V4DF_FTYPE_V4SI_V4DF_QI:
37046 case V8SI_FTYPE_V8SI_V8SI_QI:
37047 case V8SI_FTYPE_V8HI_V8SI_QI:
37048 case V8SI_FTYPE_V16QI_V8SI_QI:
37049 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37050 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37051 case V8DF_FTYPE_V8SF_V8DF_QI:
37052 case V8DF_FTYPE_V8SI_V8DF_QI:
37053 case V8DI_FTYPE_DI_V8DI_QI:
37054 case V16SF_FTYPE_V8SF_V16SF_HI:
37055 case V16SI_FTYPE_V8SI_V16SI_HI:
37056 case V16HI_FTYPE_V16HI_V16HI_HI:
37057 case V8HI_FTYPE_V16QI_V8HI_QI:
37058 case V16HI_FTYPE_V16QI_V16HI_HI:
37059 case V32HI_FTYPE_V32HI_V32HI_SI:
37060 case V32HI_FTYPE_V32QI_V32HI_SI:
37061 case V8DI_FTYPE_V16QI_V8DI_QI:
37062 case V8DI_FTYPE_V2DI_V8DI_QI:
37063 case V8DI_FTYPE_V4DI_V8DI_QI:
37064 case V8DI_FTYPE_V8DI_V8DI_QI:
37065 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37066 case V8DI_FTYPE_V8HI_V8DI_QI:
37067 case V8DI_FTYPE_V8SI_V8DI_QI:
37068 case V8HI_FTYPE_V8DI_V8HI_QI:
37069 case V8SF_FTYPE_V8DF_V8SF_QI:
37070 case V8SI_FTYPE_V8DF_V8SI_QI:
37071 case V8SI_FTYPE_V8DI_V8SI_QI:
37072 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37073 nargs = 3;
37074 break;
37075 case V32QI_FTYPE_V32QI_V32QI_INT:
37076 case V16HI_FTYPE_V16HI_V16HI_INT:
37077 case V16QI_FTYPE_V16QI_V16QI_INT:
37078 case V4DI_FTYPE_V4DI_V4DI_INT:
37079 case V8HI_FTYPE_V8HI_V8HI_INT:
37080 case V8SI_FTYPE_V8SI_V8SI_INT:
37081 case V8SI_FTYPE_V8SI_V4SI_INT:
37082 case V8SF_FTYPE_V8SF_V8SF_INT:
37083 case V8SF_FTYPE_V8SF_V4SF_INT:
37084 case V4SI_FTYPE_V4SI_V4SI_INT:
37085 case V4DF_FTYPE_V4DF_V4DF_INT:
37086 case V16SF_FTYPE_V16SF_V16SF_INT:
37087 case V16SF_FTYPE_V16SF_V4SF_INT:
37088 case V16SI_FTYPE_V16SI_V4SI_INT:
37089 case V4DF_FTYPE_V4DF_V2DF_INT:
37090 case V4SF_FTYPE_V4SF_V4SF_INT:
37091 case V2DI_FTYPE_V2DI_V2DI_INT:
37092 case V4DI_FTYPE_V4DI_V2DI_INT:
37093 case V2DF_FTYPE_V2DF_V2DF_INT:
37094 case QI_FTYPE_V8DI_V8DI_INT:
37095 case QI_FTYPE_V8DF_V8DF_INT:
37096 case QI_FTYPE_V2DF_V2DF_INT:
37097 case QI_FTYPE_V4SF_V4SF_INT:
37098 case HI_FTYPE_V16SI_V16SI_INT:
37099 case HI_FTYPE_V16SF_V16SF_INT:
37100 nargs = 3;
37101 nargs_constant = 1;
37102 break;
37103 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37104 nargs = 3;
37105 rmode = V4DImode;
37106 nargs_constant = 1;
37107 break;
37108 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37109 nargs = 3;
37110 rmode = V2DImode;
37111 nargs_constant = 1;
37112 break;
37113 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37114 nargs = 3;
37115 rmode = DImode;
37116 nargs_constant = 1;
37117 break;
37118 case V2DI_FTYPE_V2DI_UINT_UINT:
37119 nargs = 3;
37120 nargs_constant = 2;
37121 break;
37122 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37123 nargs = 3;
37124 rmode = V8DImode;
37125 nargs_constant = 1;
37126 break;
37127 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37128 nargs = 5;
37129 rmode = V8DImode;
37130 mask_pos = 2;
37131 nargs_constant = 1;
37132 break;
37133 case QI_FTYPE_V8DF_INT_QI:
37134 case QI_FTYPE_V4DF_INT_QI:
37135 case QI_FTYPE_V2DF_INT_QI:
37136 case HI_FTYPE_V16SF_INT_HI:
37137 case QI_FTYPE_V8SF_INT_QI:
37138 case QI_FTYPE_V4SF_INT_QI:
37139 nargs = 3;
37140 mask_pos = 1;
37141 nargs_constant = 1;
37142 break;
37143 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37144 nargs = 5;
37145 rmode = V4DImode;
37146 mask_pos = 2;
37147 nargs_constant = 1;
37148 break;
37149 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37150 nargs = 5;
37151 rmode = V2DImode;
37152 mask_pos = 2;
37153 nargs_constant = 1;
37154 break;
37155 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37156 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37157 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37158 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37159 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37160 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37161 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37162 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37163 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37164 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37165 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37166 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37167 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37168 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37169 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37170 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37171 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37172 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37173 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37174 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37175 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37176 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37177 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37178 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37179 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37180 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37181 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37182 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37183 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37184 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37185 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37186 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37187 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37188 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37189 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37190 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37191 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37192 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37193 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37194 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37195 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37196 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37197 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37198 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37199 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37200 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37201 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37202 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37203 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37204 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37205 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37206 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37207 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37208 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37209 nargs = 4;
37210 break;
37211 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37212 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37213 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37214 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37215 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37216 nargs = 4;
37217 nargs_constant = 1;
37218 break;
37219 case QI_FTYPE_V4DI_V4DI_INT_QI:
37220 case QI_FTYPE_V8SI_V8SI_INT_QI:
37221 case QI_FTYPE_V4DF_V4DF_INT_QI:
37222 case QI_FTYPE_V8SF_V8SF_INT_QI:
37223 case QI_FTYPE_V2DI_V2DI_INT_QI:
37224 case QI_FTYPE_V4SI_V4SI_INT_QI:
37225 case QI_FTYPE_V2DF_V2DF_INT_QI:
37226 case QI_FTYPE_V4SF_V4SF_INT_QI:
37227 case DI_FTYPE_V64QI_V64QI_INT_DI:
37228 case SI_FTYPE_V32QI_V32QI_INT_SI:
37229 case HI_FTYPE_V16QI_V16QI_INT_HI:
37230 case SI_FTYPE_V32HI_V32HI_INT_SI:
37231 case HI_FTYPE_V16HI_V16HI_INT_HI:
37232 case QI_FTYPE_V8HI_V8HI_INT_QI:
37233 nargs = 4;
37234 mask_pos = 1;
37235 nargs_constant = 1;
37236 break;
37237 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37238 nargs = 4;
37239 nargs_constant = 2;
37240 break;
37241 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37242 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37243 nargs = 4;
37244 break;
37245 case QI_FTYPE_V8DI_V8DI_INT_QI:
37246 case HI_FTYPE_V16SI_V16SI_INT_HI:
37247 case QI_FTYPE_V8DF_V8DF_INT_QI:
37248 case HI_FTYPE_V16SF_V16SF_INT_HI:
37249 mask_pos = 1;
37250 nargs = 4;
37251 nargs_constant = 1;
37252 break;
37253 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37254 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37255 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37256 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37257 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37258 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37259 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37260 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37261 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37262 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37263 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37264 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37265 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37266 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37267 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37268 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37269 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37270 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37271 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37272 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37273 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37274 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37275 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37276 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37277 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37278 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37279 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37280 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37281 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37282 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37283 nargs = 4;
37284 mask_pos = 2;
37285 nargs_constant = 1;
37286 break;
37287 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37288 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37289 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37290 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37291 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37292 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37293 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37294 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37295 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37296 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37297 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37298 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37299 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37300 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37301 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37302 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37303 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37304 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37305 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37306 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37307 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37308 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37309 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37310 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37311 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37312 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37313 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37314 nargs = 5;
37315 mask_pos = 2;
37316 nargs_constant = 1;
37317 break;
37318 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37319 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37320 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37321 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37322 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37323 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37324 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37325 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37326 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37327 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37328 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37329 nargs = 5;
37330 nargs = 5;
37331 mask_pos = 1;
37332 nargs_constant = 1;
37333 break;
37335 default:
37336 gcc_unreachable ();
37339 gcc_assert (nargs <= ARRAY_SIZE (args));
37341 if (comparison != UNKNOWN)
37343 gcc_assert (nargs == 2);
37344 return ix86_expand_sse_compare (d, exp, target, swap);
37347 if (rmode == VOIDmode || rmode == tmode)
37349 if (optimize
37350 || target == 0
37351 || GET_MODE (target) != tmode
37352 || !insn_p->operand[0].predicate (target, tmode))
37353 target = gen_reg_rtx (tmode);
37354 real_target = target;
37356 else
37358 real_target = gen_reg_rtx (tmode);
37359 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37362 for (i = 0; i < nargs; i++)
37364 tree arg = CALL_EXPR_ARG (exp, i);
37365 rtx op = expand_normal (arg);
37366 machine_mode mode = insn_p->operand[i + 1].mode;
37367 bool match = insn_p->operand[i + 1].predicate (op, mode);
37369 if (last_arg_count && (i + 1) == nargs)
37371 /* SIMD shift insns take either an 8-bit immediate or
37372 register as count. But builtin functions take int as
37373 count. If count doesn't match, we put it in register. */
37374 if (!match)
37376 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37377 if (!insn_p->operand[i + 1].predicate (op, mode))
37378 op = copy_to_reg (op);
37381 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37382 (!mask_pos && (nargs - i) <= nargs_constant))
37384 if (!match)
37385 switch (icode)
37387 case CODE_FOR_avx_vinsertf128v4di:
37388 case CODE_FOR_avx_vextractf128v4di:
37389 error ("the last argument must be an 1-bit immediate");
37390 return const0_rtx;
37392 case CODE_FOR_avx512f_cmpv8di3_mask:
37393 case CODE_FOR_avx512f_cmpv16si3_mask:
37394 case CODE_FOR_avx512f_ucmpv8di3_mask:
37395 case CODE_FOR_avx512f_ucmpv16si3_mask:
37396 case CODE_FOR_avx512vl_cmpv4di3_mask:
37397 case CODE_FOR_avx512vl_cmpv8si3_mask:
37398 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37399 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37400 case CODE_FOR_avx512vl_cmpv2di3_mask:
37401 case CODE_FOR_avx512vl_cmpv4si3_mask:
37402 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37403 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37404 error ("the last argument must be a 3-bit immediate");
37405 return const0_rtx;
37407 case CODE_FOR_sse4_1_roundsd:
37408 case CODE_FOR_sse4_1_roundss:
37410 case CODE_FOR_sse4_1_roundpd:
37411 case CODE_FOR_sse4_1_roundps:
37412 case CODE_FOR_avx_roundpd256:
37413 case CODE_FOR_avx_roundps256:
37415 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37416 case CODE_FOR_sse4_1_roundps_sfix:
37417 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37418 case CODE_FOR_avx_roundps_sfix256:
37420 case CODE_FOR_sse4_1_blendps:
37421 case CODE_FOR_avx_blendpd256:
37422 case CODE_FOR_avx_vpermilv4df:
37423 case CODE_FOR_avx_vpermilv4df_mask:
37424 case CODE_FOR_avx512f_getmantv8df_mask:
37425 case CODE_FOR_avx512f_getmantv16sf_mask:
37426 case CODE_FOR_avx512vl_getmantv8sf_mask:
37427 case CODE_FOR_avx512vl_getmantv4df_mask:
37428 case CODE_FOR_avx512vl_getmantv4sf_mask:
37429 case CODE_FOR_avx512vl_getmantv2df_mask:
37430 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37431 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37432 case CODE_FOR_avx512dq_rangepv4df_mask:
37433 case CODE_FOR_avx512dq_rangepv8sf_mask:
37434 case CODE_FOR_avx512dq_rangepv2df_mask:
37435 case CODE_FOR_avx512dq_rangepv4sf_mask:
37436 case CODE_FOR_avx_shufpd256_mask:
37437 error ("the last argument must be a 4-bit immediate");
37438 return const0_rtx;
37440 case CODE_FOR_sha1rnds4:
37441 case CODE_FOR_sse4_1_blendpd:
37442 case CODE_FOR_avx_vpermilv2df:
37443 case CODE_FOR_avx_vpermilv2df_mask:
37444 case CODE_FOR_xop_vpermil2v2df3:
37445 case CODE_FOR_xop_vpermil2v4sf3:
37446 case CODE_FOR_xop_vpermil2v4df3:
37447 case CODE_FOR_xop_vpermil2v8sf3:
37448 case CODE_FOR_avx512f_vinsertf32x4_mask:
37449 case CODE_FOR_avx512f_vinserti32x4_mask:
37450 case CODE_FOR_avx512f_vextractf32x4_mask:
37451 case CODE_FOR_avx512f_vextracti32x4_mask:
37452 case CODE_FOR_sse2_shufpd:
37453 case CODE_FOR_sse2_shufpd_mask:
37454 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37455 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37456 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37457 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37458 error ("the last argument must be a 2-bit immediate");
37459 return const0_rtx;
37461 case CODE_FOR_avx_vextractf128v4df:
37462 case CODE_FOR_avx_vextractf128v8sf:
37463 case CODE_FOR_avx_vextractf128v8si:
37464 case CODE_FOR_avx_vinsertf128v4df:
37465 case CODE_FOR_avx_vinsertf128v8sf:
37466 case CODE_FOR_avx_vinsertf128v8si:
37467 case CODE_FOR_avx512f_vinsertf64x4_mask:
37468 case CODE_FOR_avx512f_vinserti64x4_mask:
37469 case CODE_FOR_avx512f_vextractf64x4_mask:
37470 case CODE_FOR_avx512f_vextracti64x4_mask:
37471 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37472 case CODE_FOR_avx512dq_vinserti32x8_mask:
37473 case CODE_FOR_avx512vl_vinsertv4df:
37474 case CODE_FOR_avx512vl_vinsertv4di:
37475 case CODE_FOR_avx512vl_vinsertv8sf:
37476 case CODE_FOR_avx512vl_vinsertv8si:
37477 error ("the last argument must be a 1-bit immediate");
37478 return const0_rtx;
37480 case CODE_FOR_avx_vmcmpv2df3:
37481 case CODE_FOR_avx_vmcmpv4sf3:
37482 case CODE_FOR_avx_cmpv2df3:
37483 case CODE_FOR_avx_cmpv4sf3:
37484 case CODE_FOR_avx_cmpv4df3:
37485 case CODE_FOR_avx_cmpv8sf3:
37486 case CODE_FOR_avx512f_cmpv8df3_mask:
37487 case CODE_FOR_avx512f_cmpv16sf3_mask:
37488 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37489 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37490 error ("the last argument must be a 5-bit immediate");
37491 return const0_rtx;
37493 default:
37494 switch (nargs_constant)
37496 case 2:
37497 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37498 (!mask_pos && (nargs - i) == nargs_constant))
37500 error ("the next to last argument must be an 8-bit immediate");
37501 break;
37503 case 1:
37504 error ("the last argument must be an 8-bit immediate");
37505 break;
37506 default:
37507 gcc_unreachable ();
37509 return const0_rtx;
37512 else
37514 if (VECTOR_MODE_P (mode))
37515 op = safe_vector_operand (op, mode);
37517 /* If we aren't optimizing, only allow one memory operand to
37518 be generated. */
37519 if (memory_operand (op, mode))
37520 num_memory++;
37522 op = fixup_modeless_constant (op, mode);
37524 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37526 if (optimize || !match || num_memory > 1)
37527 op = copy_to_mode_reg (mode, op);
37529 else
37531 op = copy_to_reg (op);
37532 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37536 args[i].op = op;
37537 args[i].mode = mode;
37540 switch (nargs)
37542 case 1:
37543 pat = GEN_FCN (icode) (real_target, args[0].op);
37544 break;
37545 case 2:
37546 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37547 break;
37548 case 3:
37549 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37550 args[2].op);
37551 break;
37552 case 4:
37553 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37554 args[2].op, args[3].op);
37555 break;
37556 case 5:
37557 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37558 args[2].op, args[3].op, args[4].op);
37559 case 6:
37560 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37561 args[2].op, args[3].op, args[4].op,
37562 args[5].op);
37563 break;
37564 default:
37565 gcc_unreachable ();
37568 if (! pat)
37569 return 0;
37571 emit_insn (pat);
37572 return target;
37575 /* Transform pattern of following layout:
37576 (parallel [
37577 set (A B)
37578 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37580 into:
37581 (set (A B))
37584 (parallel [ A B
37586 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37589 into:
37590 (parallel [ A B ... ]) */
37592 static rtx
37593 ix86_erase_embedded_rounding (rtx pat)
37595 if (GET_CODE (pat) == INSN)
37596 pat = PATTERN (pat);
37598 gcc_assert (GET_CODE (pat) == PARALLEL);
37600 if (XVECLEN (pat, 0) == 2)
37602 rtx p0 = XVECEXP (pat, 0, 0);
37603 rtx p1 = XVECEXP (pat, 0, 1);
37605 gcc_assert (GET_CODE (p0) == SET
37606 && GET_CODE (p1) == UNSPEC
37607 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37609 return p0;
37611 else
37613 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37614 int i = 0;
37615 int j = 0;
37617 for (; i < XVECLEN (pat, 0); ++i)
37619 rtx elem = XVECEXP (pat, 0, i);
37620 if (GET_CODE (elem) != UNSPEC
37621 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37622 res [j++] = elem;
37625 /* No more than 1 occurence was removed. */
37626 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37628 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37632 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37633 with rounding. */
37634 static rtx
37635 ix86_expand_sse_comi_round (const struct builtin_description *d,
37636 tree exp, rtx target)
37638 rtx pat, set_dst;
37639 tree arg0 = CALL_EXPR_ARG (exp, 0);
37640 tree arg1 = CALL_EXPR_ARG (exp, 1);
37641 tree arg2 = CALL_EXPR_ARG (exp, 2);
37642 tree arg3 = CALL_EXPR_ARG (exp, 3);
37643 rtx op0 = expand_normal (arg0);
37644 rtx op1 = expand_normal (arg1);
37645 rtx op2 = expand_normal (arg2);
37646 rtx op3 = expand_normal (arg3);
37647 enum insn_code icode = d->icode;
37648 const struct insn_data_d *insn_p = &insn_data[icode];
37649 machine_mode mode0 = insn_p->operand[0].mode;
37650 machine_mode mode1 = insn_p->operand[1].mode;
37651 enum rtx_code comparison = UNEQ;
37652 bool need_ucomi = false;
37654 /* See avxintrin.h for values. */
37655 enum rtx_code comi_comparisons[32] =
37657 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37658 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37659 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37661 bool need_ucomi_values[32] =
37663 true, false, false, true, true, false, false, true,
37664 true, false, false, true, true, false, false, true,
37665 false, true, true, false, false, true, true, false,
37666 false, true, true, false, false, true, true, false
37669 if (!CONST_INT_P (op2))
37671 error ("the third argument must be comparison constant");
37672 return const0_rtx;
37674 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37676 error ("incorrect comparison mode");
37677 return const0_rtx;
37680 if (!insn_p->operand[2].predicate (op3, SImode))
37682 error ("incorrect rounding operand");
37683 return const0_rtx;
37686 comparison = comi_comparisons[INTVAL (op2)];
37687 need_ucomi = need_ucomi_values[INTVAL (op2)];
37689 if (VECTOR_MODE_P (mode0))
37690 op0 = safe_vector_operand (op0, mode0);
37691 if (VECTOR_MODE_P (mode1))
37692 op1 = safe_vector_operand (op1, mode1);
37694 target = gen_reg_rtx (SImode);
37695 emit_move_insn (target, const0_rtx);
37696 target = gen_rtx_SUBREG (QImode, target, 0);
37698 if ((optimize && !register_operand (op0, mode0))
37699 || !insn_p->operand[0].predicate (op0, mode0))
37700 op0 = copy_to_mode_reg (mode0, op0);
37701 if ((optimize && !register_operand (op1, mode1))
37702 || !insn_p->operand[1].predicate (op1, mode1))
37703 op1 = copy_to_mode_reg (mode1, op1);
37705 if (need_ucomi)
37706 icode = icode == CODE_FOR_sse_comi_round
37707 ? CODE_FOR_sse_ucomi_round
37708 : CODE_FOR_sse2_ucomi_round;
37710 pat = GEN_FCN (icode) (op0, op1, op3);
37711 if (! pat)
37712 return 0;
37714 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37715 if (INTVAL (op3) == NO_ROUND)
37717 pat = ix86_erase_embedded_rounding (pat);
37718 if (! pat)
37719 return 0;
37721 set_dst = SET_DEST (pat);
37723 else
37725 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37726 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37729 emit_insn (pat);
37730 emit_insn (gen_rtx_SET (VOIDmode,
37731 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37732 gen_rtx_fmt_ee (comparison, QImode,
37733 set_dst,
37734 const0_rtx)));
37736 return SUBREG_REG (target);
37739 static rtx
37740 ix86_expand_round_builtin (const struct builtin_description *d,
37741 tree exp, rtx target)
37743 rtx pat;
37744 unsigned int i, nargs;
37745 struct
37747 rtx op;
37748 machine_mode mode;
37749 } args[6];
37750 enum insn_code icode = d->icode;
37751 const struct insn_data_d *insn_p = &insn_data[icode];
37752 machine_mode tmode = insn_p->operand[0].mode;
37753 unsigned int nargs_constant = 0;
37754 unsigned int redundant_embed_rnd = 0;
37756 switch ((enum ix86_builtin_func_type) d->flag)
37758 case UINT64_FTYPE_V2DF_INT:
37759 case UINT64_FTYPE_V4SF_INT:
37760 case UINT_FTYPE_V2DF_INT:
37761 case UINT_FTYPE_V4SF_INT:
37762 case INT64_FTYPE_V2DF_INT:
37763 case INT64_FTYPE_V4SF_INT:
37764 case INT_FTYPE_V2DF_INT:
37765 case INT_FTYPE_V4SF_INT:
37766 nargs = 2;
37767 break;
37768 case V4SF_FTYPE_V4SF_UINT_INT:
37769 case V4SF_FTYPE_V4SF_UINT64_INT:
37770 case V2DF_FTYPE_V2DF_UINT64_INT:
37771 case V4SF_FTYPE_V4SF_INT_INT:
37772 case V4SF_FTYPE_V4SF_INT64_INT:
37773 case V2DF_FTYPE_V2DF_INT64_INT:
37774 case V4SF_FTYPE_V4SF_V4SF_INT:
37775 case V2DF_FTYPE_V2DF_V2DF_INT:
37776 case V4SF_FTYPE_V4SF_V2DF_INT:
37777 case V2DF_FTYPE_V2DF_V4SF_INT:
37778 nargs = 3;
37779 break;
37780 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37781 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37782 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37783 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37784 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37785 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37786 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37787 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37788 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37789 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37790 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37791 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37792 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37793 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37794 nargs = 4;
37795 break;
37796 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37797 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37798 nargs_constant = 2;
37799 nargs = 4;
37800 break;
37801 case INT_FTYPE_V4SF_V4SF_INT_INT:
37802 case INT_FTYPE_V2DF_V2DF_INT_INT:
37803 return ix86_expand_sse_comi_round (d, exp, target);
37804 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37805 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37806 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37807 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37808 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37809 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37810 nargs = 5;
37811 break;
37812 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37813 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37814 nargs_constant = 4;
37815 nargs = 5;
37816 break;
37817 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37818 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37819 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37820 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37821 nargs_constant = 3;
37822 nargs = 5;
37823 break;
37824 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37825 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37826 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37827 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37828 nargs = 6;
37829 nargs_constant = 4;
37830 break;
37831 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37832 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37833 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37834 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37835 nargs = 6;
37836 nargs_constant = 3;
37837 break;
37838 default:
37839 gcc_unreachable ();
37841 gcc_assert (nargs <= ARRAY_SIZE (args));
37843 if (optimize
37844 || target == 0
37845 || GET_MODE (target) != tmode
37846 || !insn_p->operand[0].predicate (target, tmode))
37847 target = gen_reg_rtx (tmode);
37849 for (i = 0; i < nargs; i++)
37851 tree arg = CALL_EXPR_ARG (exp, i);
37852 rtx op = expand_normal (arg);
37853 machine_mode mode = insn_p->operand[i + 1].mode;
37854 bool match = insn_p->operand[i + 1].predicate (op, mode);
37856 if (i == nargs - nargs_constant)
37858 if (!match)
37860 switch (icode)
37862 case CODE_FOR_avx512f_getmantv8df_mask_round:
37863 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37864 case CODE_FOR_avx512f_vgetmantv2df_round:
37865 case CODE_FOR_avx512f_vgetmantv4sf_round:
37866 error ("the immediate argument must be a 4-bit immediate");
37867 return const0_rtx;
37868 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37869 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37870 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37871 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37872 error ("the immediate argument must be a 5-bit immediate");
37873 return const0_rtx;
37874 default:
37875 error ("the immediate argument must be an 8-bit immediate");
37876 return const0_rtx;
37880 else if (i == nargs-1)
37882 if (!insn_p->operand[nargs].predicate (op, SImode))
37884 error ("incorrect rounding operand");
37885 return const0_rtx;
37888 /* If there is no rounding use normal version of the pattern. */
37889 if (INTVAL (op) == NO_ROUND)
37890 redundant_embed_rnd = 1;
37892 else
37894 if (VECTOR_MODE_P (mode))
37895 op = safe_vector_operand (op, mode);
37897 op = fixup_modeless_constant (op, mode);
37899 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37901 if (optimize || !match)
37902 op = copy_to_mode_reg (mode, op);
37904 else
37906 op = copy_to_reg (op);
37907 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37911 args[i].op = op;
37912 args[i].mode = mode;
37915 switch (nargs)
37917 case 1:
37918 pat = GEN_FCN (icode) (target, args[0].op);
37919 break;
37920 case 2:
37921 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37922 break;
37923 case 3:
37924 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37925 args[2].op);
37926 break;
37927 case 4:
37928 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37929 args[2].op, args[3].op);
37930 break;
37931 case 5:
37932 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37933 args[2].op, args[3].op, args[4].op);
37934 case 6:
37935 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37936 args[2].op, args[3].op, args[4].op,
37937 args[5].op);
37938 break;
37939 default:
37940 gcc_unreachable ();
37943 if (!pat)
37944 return 0;
37946 if (redundant_embed_rnd)
37947 pat = ix86_erase_embedded_rounding (pat);
37949 emit_insn (pat);
37950 return target;
37953 /* Subroutine of ix86_expand_builtin to take care of special insns
37954 with variable number of operands. */
37956 static rtx
37957 ix86_expand_special_args_builtin (const struct builtin_description *d,
37958 tree exp, rtx target)
37960 tree arg;
37961 rtx pat, op;
37962 unsigned int i, nargs, arg_adjust, memory;
37963 bool aligned_mem = false;
37964 struct
37966 rtx op;
37967 machine_mode mode;
37968 } args[3];
37969 enum insn_code icode = d->icode;
37970 bool last_arg_constant = false;
37971 const struct insn_data_d *insn_p = &insn_data[icode];
37972 machine_mode tmode = insn_p->operand[0].mode;
37973 enum { load, store } klass;
37975 switch ((enum ix86_builtin_func_type) d->flag)
37977 case VOID_FTYPE_VOID:
37978 emit_insn (GEN_FCN (icode) (target));
37979 return 0;
37980 case VOID_FTYPE_UINT64:
37981 case VOID_FTYPE_UNSIGNED:
37982 nargs = 0;
37983 klass = store;
37984 memory = 0;
37985 break;
37987 case INT_FTYPE_VOID:
37988 case USHORT_FTYPE_VOID:
37989 case UINT64_FTYPE_VOID:
37990 case UNSIGNED_FTYPE_VOID:
37991 nargs = 0;
37992 klass = load;
37993 memory = 0;
37994 break;
37995 case UINT64_FTYPE_PUNSIGNED:
37996 case V2DI_FTYPE_PV2DI:
37997 case V4DI_FTYPE_PV4DI:
37998 case V32QI_FTYPE_PCCHAR:
37999 case V16QI_FTYPE_PCCHAR:
38000 case V8SF_FTYPE_PCV4SF:
38001 case V8SF_FTYPE_PCFLOAT:
38002 case V4SF_FTYPE_PCFLOAT:
38003 case V4DF_FTYPE_PCV2DF:
38004 case V4DF_FTYPE_PCDOUBLE:
38005 case V2DF_FTYPE_PCDOUBLE:
38006 case VOID_FTYPE_PVOID:
38007 case V16SI_FTYPE_PV4SI:
38008 case V16SF_FTYPE_PV4SF:
38009 case V8DI_FTYPE_PV4DI:
38010 case V8DI_FTYPE_PV8DI:
38011 case V8DF_FTYPE_PV4DF:
38012 nargs = 1;
38013 klass = load;
38014 memory = 0;
38015 switch (icode)
38017 case CODE_FOR_sse4_1_movntdqa:
38018 case CODE_FOR_avx2_movntdqa:
38019 case CODE_FOR_avx512f_movntdqa:
38020 aligned_mem = true;
38021 break;
38022 default:
38023 break;
38025 break;
38026 case VOID_FTYPE_PV2SF_V4SF:
38027 case VOID_FTYPE_PV8DI_V8DI:
38028 case VOID_FTYPE_PV4DI_V4DI:
38029 case VOID_FTYPE_PV2DI_V2DI:
38030 case VOID_FTYPE_PCHAR_V32QI:
38031 case VOID_FTYPE_PCHAR_V16QI:
38032 case VOID_FTYPE_PFLOAT_V16SF:
38033 case VOID_FTYPE_PFLOAT_V8SF:
38034 case VOID_FTYPE_PFLOAT_V4SF:
38035 case VOID_FTYPE_PDOUBLE_V8DF:
38036 case VOID_FTYPE_PDOUBLE_V4DF:
38037 case VOID_FTYPE_PDOUBLE_V2DF:
38038 case VOID_FTYPE_PLONGLONG_LONGLONG:
38039 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38040 case VOID_FTYPE_PINT_INT:
38041 nargs = 1;
38042 klass = store;
38043 /* Reserve memory operand for target. */
38044 memory = ARRAY_SIZE (args);
38045 switch (icode)
38047 /* These builtins and instructions require the memory
38048 to be properly aligned. */
38049 case CODE_FOR_avx_movntv4di:
38050 case CODE_FOR_sse2_movntv2di:
38051 case CODE_FOR_avx_movntv8sf:
38052 case CODE_FOR_sse_movntv4sf:
38053 case CODE_FOR_sse4a_vmmovntv4sf:
38054 case CODE_FOR_avx_movntv4df:
38055 case CODE_FOR_sse2_movntv2df:
38056 case CODE_FOR_sse4a_vmmovntv2df:
38057 case CODE_FOR_sse2_movntidi:
38058 case CODE_FOR_sse_movntq:
38059 case CODE_FOR_sse2_movntisi:
38060 case CODE_FOR_avx512f_movntv16sf:
38061 case CODE_FOR_avx512f_movntv8df:
38062 case CODE_FOR_avx512f_movntv8di:
38063 aligned_mem = true;
38064 break;
38065 default:
38066 break;
38068 break;
38069 case V4SF_FTYPE_V4SF_PCV2SF:
38070 case V2DF_FTYPE_V2DF_PCDOUBLE:
38071 nargs = 2;
38072 klass = load;
38073 memory = 1;
38074 break;
38075 case V8SF_FTYPE_PCV8SF_V8SI:
38076 case V4DF_FTYPE_PCV4DF_V4DI:
38077 case V4SF_FTYPE_PCV4SF_V4SI:
38078 case V2DF_FTYPE_PCV2DF_V2DI:
38079 case V8SI_FTYPE_PCV8SI_V8SI:
38080 case V4DI_FTYPE_PCV4DI_V4DI:
38081 case V4SI_FTYPE_PCV4SI_V4SI:
38082 case V2DI_FTYPE_PCV2DI_V2DI:
38083 nargs = 2;
38084 klass = load;
38085 memory = 0;
38086 break;
38087 case VOID_FTYPE_PV8DF_V8DF_QI:
38088 case VOID_FTYPE_PV16SF_V16SF_HI:
38089 case VOID_FTYPE_PV8DI_V8DI_QI:
38090 case VOID_FTYPE_PV4DI_V4DI_QI:
38091 case VOID_FTYPE_PV2DI_V2DI_QI:
38092 case VOID_FTYPE_PV16SI_V16SI_HI:
38093 case VOID_FTYPE_PV8SI_V8SI_QI:
38094 case VOID_FTYPE_PV4SI_V4SI_QI:
38095 switch (icode)
38097 /* These builtins and instructions require the memory
38098 to be properly aligned. */
38099 case CODE_FOR_avx512f_storev16sf_mask:
38100 case CODE_FOR_avx512f_storev16si_mask:
38101 case CODE_FOR_avx512f_storev8df_mask:
38102 case CODE_FOR_avx512f_storev8di_mask:
38103 case CODE_FOR_avx512vl_storev8sf_mask:
38104 case CODE_FOR_avx512vl_storev8si_mask:
38105 case CODE_FOR_avx512vl_storev4df_mask:
38106 case CODE_FOR_avx512vl_storev4di_mask:
38107 case CODE_FOR_avx512vl_storev4sf_mask:
38108 case CODE_FOR_avx512vl_storev4si_mask:
38109 case CODE_FOR_avx512vl_storev2df_mask:
38110 case CODE_FOR_avx512vl_storev2di_mask:
38111 aligned_mem = true;
38112 break;
38113 default:
38114 break;
38116 /* FALLTHRU */
38117 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38118 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38119 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38120 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38121 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38122 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38123 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38124 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38125 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38126 case VOID_FTYPE_PFLOAT_V4SF_QI:
38127 case VOID_FTYPE_PV8SI_V8DI_QI:
38128 case VOID_FTYPE_PV8HI_V8DI_QI:
38129 case VOID_FTYPE_PV16HI_V16SI_HI:
38130 case VOID_FTYPE_PV16QI_V8DI_QI:
38131 case VOID_FTYPE_PV16QI_V16SI_HI:
38132 case VOID_FTYPE_PV4SI_V4DI_QI:
38133 case VOID_FTYPE_PV4SI_V2DI_QI:
38134 case VOID_FTYPE_PV8HI_V4DI_QI:
38135 case VOID_FTYPE_PV8HI_V2DI_QI:
38136 case VOID_FTYPE_PV8HI_V8SI_QI:
38137 case VOID_FTYPE_PV8HI_V4SI_QI:
38138 case VOID_FTYPE_PV16QI_V4DI_QI:
38139 case VOID_FTYPE_PV16QI_V2DI_QI:
38140 case VOID_FTYPE_PV16QI_V8SI_QI:
38141 case VOID_FTYPE_PV16QI_V4SI_QI:
38142 case VOID_FTYPE_PV8HI_V8HI_QI:
38143 case VOID_FTYPE_PV16HI_V16HI_HI:
38144 case VOID_FTYPE_PV32HI_V32HI_SI:
38145 case VOID_FTYPE_PV16QI_V16QI_HI:
38146 case VOID_FTYPE_PV32QI_V32QI_SI:
38147 case VOID_FTYPE_PV64QI_V64QI_DI:
38148 case VOID_FTYPE_PV4DF_V4DF_QI:
38149 case VOID_FTYPE_PV2DF_V2DF_QI:
38150 case VOID_FTYPE_PV8SF_V8SF_QI:
38151 case VOID_FTYPE_PV4SF_V4SF_QI:
38152 nargs = 2;
38153 klass = store;
38154 /* Reserve memory operand for target. */
38155 memory = ARRAY_SIZE (args);
38156 break;
38157 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38158 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38159 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38160 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38161 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38162 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38163 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38164 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38165 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38166 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38167 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38168 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38169 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38170 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38171 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38172 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38173 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38174 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38175 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38176 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38177 nargs = 3;
38178 klass = load;
38179 memory = 0;
38180 switch (icode)
38182 /* These builtins and instructions require the memory
38183 to be properly aligned. */
38184 case CODE_FOR_avx512f_loadv16sf_mask:
38185 case CODE_FOR_avx512f_loadv16si_mask:
38186 case CODE_FOR_avx512f_loadv8df_mask:
38187 case CODE_FOR_avx512f_loadv8di_mask:
38188 case CODE_FOR_avx512vl_loadv8sf_mask:
38189 case CODE_FOR_avx512vl_loadv8si_mask:
38190 case CODE_FOR_avx512vl_loadv4df_mask:
38191 case CODE_FOR_avx512vl_loadv4di_mask:
38192 case CODE_FOR_avx512vl_loadv4sf_mask:
38193 case CODE_FOR_avx512vl_loadv4si_mask:
38194 case CODE_FOR_avx512vl_loadv2df_mask:
38195 case CODE_FOR_avx512vl_loadv2di_mask:
38196 case CODE_FOR_avx512bw_loadv64qi_mask:
38197 case CODE_FOR_avx512vl_loadv32qi_mask:
38198 case CODE_FOR_avx512vl_loadv16qi_mask:
38199 case CODE_FOR_avx512bw_loadv32hi_mask:
38200 case CODE_FOR_avx512vl_loadv16hi_mask:
38201 case CODE_FOR_avx512vl_loadv8hi_mask:
38202 aligned_mem = true;
38203 break;
38204 default:
38205 break;
38207 break;
38208 case VOID_FTYPE_UINT_UINT_UINT:
38209 case VOID_FTYPE_UINT64_UINT_UINT:
38210 case UCHAR_FTYPE_UINT_UINT_UINT:
38211 case UCHAR_FTYPE_UINT64_UINT_UINT:
38212 nargs = 3;
38213 klass = load;
38214 memory = ARRAY_SIZE (args);
38215 last_arg_constant = true;
38216 break;
38217 default:
38218 gcc_unreachable ();
38221 gcc_assert (nargs <= ARRAY_SIZE (args));
38223 if (klass == store)
38225 arg = CALL_EXPR_ARG (exp, 0);
38226 op = expand_normal (arg);
38227 gcc_assert (target == 0);
38228 if (memory)
38230 op = ix86_zero_extend_to_Pmode (op);
38231 target = gen_rtx_MEM (tmode, op);
38232 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38233 on it. Try to improve it using get_pointer_alignment,
38234 and if the special builtin is one that requires strict
38235 mode alignment, also from it's GET_MODE_ALIGNMENT.
38236 Failure to do so could lead to ix86_legitimate_combined_insn
38237 rejecting all changes to such insns. */
38238 unsigned int align = get_pointer_alignment (arg);
38239 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38240 align = GET_MODE_ALIGNMENT (tmode);
38241 if (MEM_ALIGN (target) < align)
38242 set_mem_align (target, align);
38244 else
38245 target = force_reg (tmode, op);
38246 arg_adjust = 1;
38248 else
38250 arg_adjust = 0;
38251 if (optimize
38252 || target == 0
38253 || !register_operand (target, tmode)
38254 || GET_MODE (target) != tmode)
38255 target = gen_reg_rtx (tmode);
38258 for (i = 0; i < nargs; i++)
38260 machine_mode mode = insn_p->operand[i + 1].mode;
38261 bool match;
38263 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38264 op = expand_normal (arg);
38265 match = insn_p->operand[i + 1].predicate (op, mode);
38267 if (last_arg_constant && (i + 1) == nargs)
38269 if (!match)
38271 if (icode == CODE_FOR_lwp_lwpvalsi3
38272 || icode == CODE_FOR_lwp_lwpinssi3
38273 || icode == CODE_FOR_lwp_lwpvaldi3
38274 || icode == CODE_FOR_lwp_lwpinsdi3)
38275 error ("the last argument must be a 32-bit immediate");
38276 else
38277 error ("the last argument must be an 8-bit immediate");
38278 return const0_rtx;
38281 else
38283 if (i == memory)
38285 /* This must be the memory operand. */
38286 op = ix86_zero_extend_to_Pmode (op);
38287 op = gen_rtx_MEM (mode, op);
38288 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38289 on it. Try to improve it using get_pointer_alignment,
38290 and if the special builtin is one that requires strict
38291 mode alignment, also from it's GET_MODE_ALIGNMENT.
38292 Failure to do so could lead to ix86_legitimate_combined_insn
38293 rejecting all changes to such insns. */
38294 unsigned int align = get_pointer_alignment (arg);
38295 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38296 align = GET_MODE_ALIGNMENT (mode);
38297 if (MEM_ALIGN (op) < align)
38298 set_mem_align (op, align);
38300 else
38302 /* This must be register. */
38303 if (VECTOR_MODE_P (mode))
38304 op = safe_vector_operand (op, mode);
38306 op = fixup_modeless_constant (op, mode);
38308 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38309 op = copy_to_mode_reg (mode, op);
38310 else
38312 op = copy_to_reg (op);
38313 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38318 args[i].op = op;
38319 args[i].mode = mode;
38322 switch (nargs)
38324 case 0:
38325 pat = GEN_FCN (icode) (target);
38326 break;
38327 case 1:
38328 pat = GEN_FCN (icode) (target, args[0].op);
38329 break;
38330 case 2:
38331 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38332 break;
38333 case 3:
38334 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38335 break;
38336 default:
38337 gcc_unreachable ();
38340 if (! pat)
38341 return 0;
38342 emit_insn (pat);
38343 return klass == store ? 0 : target;
38346 /* Return the integer constant in ARG. Constrain it to be in the range
38347 of the subparts of VEC_TYPE; issue an error if not. */
38349 static int
38350 get_element_number (tree vec_type, tree arg)
38352 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38354 if (!tree_fits_uhwi_p (arg)
38355 || (elt = tree_to_uhwi (arg), elt > max))
38357 error ("selector must be an integer constant in the range 0..%wi", max);
38358 return 0;
38361 return elt;
38364 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38365 ix86_expand_vector_init. We DO have language-level syntax for this, in
38366 the form of (type){ init-list }. Except that since we can't place emms
38367 instructions from inside the compiler, we can't allow the use of MMX
38368 registers unless the user explicitly asks for it. So we do *not* define
38369 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38370 we have builtins invoked by mmintrin.h that gives us license to emit
38371 these sorts of instructions. */
38373 static rtx
38374 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38376 machine_mode tmode = TYPE_MODE (type);
38377 machine_mode inner_mode = GET_MODE_INNER (tmode);
38378 int i, n_elt = GET_MODE_NUNITS (tmode);
38379 rtvec v = rtvec_alloc (n_elt);
38381 gcc_assert (VECTOR_MODE_P (tmode));
38382 gcc_assert (call_expr_nargs (exp) == n_elt);
38384 for (i = 0; i < n_elt; ++i)
38386 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38387 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38390 if (!target || !register_operand (target, tmode))
38391 target = gen_reg_rtx (tmode);
38393 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38394 return target;
38397 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38398 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38399 had a language-level syntax for referencing vector elements. */
38401 static rtx
38402 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38404 machine_mode tmode, mode0;
38405 tree arg0, arg1;
38406 int elt;
38407 rtx op0;
38409 arg0 = CALL_EXPR_ARG (exp, 0);
38410 arg1 = CALL_EXPR_ARG (exp, 1);
38412 op0 = expand_normal (arg0);
38413 elt = get_element_number (TREE_TYPE (arg0), arg1);
38415 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38416 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38417 gcc_assert (VECTOR_MODE_P (mode0));
38419 op0 = force_reg (mode0, op0);
38421 if (optimize || !target || !register_operand (target, tmode))
38422 target = gen_reg_rtx (tmode);
38424 ix86_expand_vector_extract (true, target, op0, elt);
38426 return target;
38429 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38430 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38431 a language-level syntax for referencing vector elements. */
38433 static rtx
38434 ix86_expand_vec_set_builtin (tree exp)
38436 machine_mode tmode, mode1;
38437 tree arg0, arg1, arg2;
38438 int elt;
38439 rtx op0, op1, target;
38441 arg0 = CALL_EXPR_ARG (exp, 0);
38442 arg1 = CALL_EXPR_ARG (exp, 1);
38443 arg2 = CALL_EXPR_ARG (exp, 2);
38445 tmode = TYPE_MODE (TREE_TYPE (arg0));
38446 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38447 gcc_assert (VECTOR_MODE_P (tmode));
38449 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38450 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38451 elt = get_element_number (TREE_TYPE (arg0), arg2);
38453 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38454 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38456 op0 = force_reg (tmode, op0);
38457 op1 = force_reg (mode1, op1);
38459 /* OP0 is the source of these builtin functions and shouldn't be
38460 modified. Create a copy, use it and return it as target. */
38461 target = gen_reg_rtx (tmode);
38462 emit_move_insn (target, op0);
38463 ix86_expand_vector_set (true, target, op1, elt);
38465 return target;
38468 /* Emit conditional move of SRC to DST with condition
38469 OP1 CODE OP2. */
38470 static void
38471 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38473 rtx t;
38475 if (TARGET_CMOVE)
38477 t = ix86_expand_compare (code, op1, op2);
38478 emit_insn (gen_rtx_SET (VOIDmode, dst,
38479 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38480 src, dst)));
38482 else
38484 rtx nomove = gen_label_rtx ();
38485 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38486 const0_rtx, GET_MODE (op1), 1, nomove);
38487 emit_move_insn (dst, src);
38488 emit_label (nomove);
38492 /* Choose max of DST and SRC and put it to DST. */
38493 static void
38494 ix86_emit_move_max (rtx dst, rtx src)
38496 ix86_emit_cmove (dst, src, LTU, dst, src);
38499 /* Expand an expression EXP that calls a built-in function,
38500 with result going to TARGET if that's convenient
38501 (and in mode MODE if that's convenient).
38502 SUBTARGET may be used as the target for computing one of EXP's operands.
38503 IGNORE is nonzero if the value is to be ignored. */
38505 static rtx
38506 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38507 machine_mode mode, int ignore)
38509 const struct builtin_description *d;
38510 size_t i;
38511 enum insn_code icode;
38512 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38513 tree arg0, arg1, arg2, arg3, arg4;
38514 rtx op0, op1, op2, op3, op4, pat, insn;
38515 machine_mode mode0, mode1, mode2, mode3, mode4;
38516 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38518 /* For CPU builtins that can be folded, fold first and expand the fold. */
38519 switch (fcode)
38521 case IX86_BUILTIN_CPU_INIT:
38523 /* Make it call __cpu_indicator_init in libgcc. */
38524 tree call_expr, fndecl, type;
38525 type = build_function_type_list (integer_type_node, NULL_TREE);
38526 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38527 call_expr = build_call_expr (fndecl, 0);
38528 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38530 case IX86_BUILTIN_CPU_IS:
38531 case IX86_BUILTIN_CPU_SUPPORTS:
38533 tree arg0 = CALL_EXPR_ARG (exp, 0);
38534 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38535 gcc_assert (fold_expr != NULL_TREE);
38536 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38540 /* Determine whether the builtin function is available under the current ISA.
38541 Originally the builtin was not created if it wasn't applicable to the
38542 current ISA based on the command line switches. With function specific
38543 options, we need to check in the context of the function making the call
38544 whether it is supported. */
38545 if (ix86_builtins_isa[fcode].isa
38546 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38548 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38549 NULL, (enum fpmath_unit) 0, false);
38551 if (!opts)
38552 error ("%qE needs unknown isa option", fndecl);
38553 else
38555 gcc_assert (opts != NULL);
38556 error ("%qE needs isa option %s", fndecl, opts);
38557 free (opts);
38559 return const0_rtx;
38562 switch (fcode)
38564 case IX86_BUILTIN_BNDMK:
38565 if (!target
38566 || GET_MODE (target) != BNDmode
38567 || !register_operand (target, BNDmode))
38568 target = gen_reg_rtx (BNDmode);
38570 arg0 = CALL_EXPR_ARG (exp, 0);
38571 arg1 = CALL_EXPR_ARG (exp, 1);
38573 op0 = expand_normal (arg0);
38574 op1 = expand_normal (arg1);
38576 if (!register_operand (op0, Pmode))
38577 op0 = ix86_zero_extend_to_Pmode (op0);
38578 if (!register_operand (op1, Pmode))
38579 op1 = ix86_zero_extend_to_Pmode (op1);
38581 /* Builtin arg1 is size of block but instruction op1 should
38582 be (size - 1). */
38583 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38584 NULL_RTX, 1, OPTAB_DIRECT);
38586 emit_insn (BNDmode == BND64mode
38587 ? gen_bnd64_mk (target, op0, op1)
38588 : gen_bnd32_mk (target, op0, op1));
38589 return target;
38591 case IX86_BUILTIN_BNDSTX:
38592 arg0 = CALL_EXPR_ARG (exp, 0);
38593 arg1 = CALL_EXPR_ARG (exp, 1);
38594 arg2 = CALL_EXPR_ARG (exp, 2);
38596 op0 = expand_normal (arg0);
38597 op1 = expand_normal (arg1);
38598 op2 = expand_normal (arg2);
38600 if (!register_operand (op0, Pmode))
38601 op0 = ix86_zero_extend_to_Pmode (op0);
38602 if (!register_operand (op1, BNDmode))
38603 op1 = copy_to_mode_reg (BNDmode, op1);
38604 if (!register_operand (op2, Pmode))
38605 op2 = ix86_zero_extend_to_Pmode (op2);
38607 emit_insn (BNDmode == BND64mode
38608 ? gen_bnd64_stx (op2, op0, op1)
38609 : gen_bnd32_stx (op2, op0, op1));
38610 return 0;
38612 case IX86_BUILTIN_BNDLDX:
38613 if (!target
38614 || GET_MODE (target) != BNDmode
38615 || !register_operand (target, BNDmode))
38616 target = gen_reg_rtx (BNDmode);
38618 arg0 = CALL_EXPR_ARG (exp, 0);
38619 arg1 = CALL_EXPR_ARG (exp, 1);
38621 op0 = expand_normal (arg0);
38622 op1 = expand_normal (arg1);
38624 if (!register_operand (op0, Pmode))
38625 op0 = ix86_zero_extend_to_Pmode (op0);
38626 if (!register_operand (op1, Pmode))
38627 op1 = ix86_zero_extend_to_Pmode (op1);
38629 emit_insn (BNDmode == BND64mode
38630 ? gen_bnd64_ldx (target, op0, op1)
38631 : gen_bnd32_ldx (target, op0, op1));
38632 return target;
38634 case IX86_BUILTIN_BNDCL:
38635 arg0 = CALL_EXPR_ARG (exp, 0);
38636 arg1 = CALL_EXPR_ARG (exp, 1);
38638 op0 = expand_normal (arg0);
38639 op1 = expand_normal (arg1);
38641 if (!register_operand (op0, Pmode))
38642 op0 = ix86_zero_extend_to_Pmode (op0);
38643 if (!register_operand (op1, BNDmode))
38644 op1 = copy_to_mode_reg (BNDmode, op1);
38646 emit_insn (BNDmode == BND64mode
38647 ? gen_bnd64_cl (op1, op0)
38648 : gen_bnd32_cl (op1, op0));
38649 return 0;
38651 case IX86_BUILTIN_BNDCU:
38652 arg0 = CALL_EXPR_ARG (exp, 0);
38653 arg1 = CALL_EXPR_ARG (exp, 1);
38655 op0 = expand_normal (arg0);
38656 op1 = expand_normal (arg1);
38658 if (!register_operand (op0, Pmode))
38659 op0 = ix86_zero_extend_to_Pmode (op0);
38660 if (!register_operand (op1, BNDmode))
38661 op1 = copy_to_mode_reg (BNDmode, op1);
38663 emit_insn (BNDmode == BND64mode
38664 ? gen_bnd64_cu (op1, op0)
38665 : gen_bnd32_cu (op1, op0));
38666 return 0;
38668 case IX86_BUILTIN_BNDRET:
38669 arg0 = CALL_EXPR_ARG (exp, 0);
38670 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38671 target = chkp_get_rtl_bounds (arg0);
38673 /* If no bounds were specified for returned value,
38674 then use INIT bounds. It usually happens when
38675 some built-in function is expanded. */
38676 if (!target)
38678 rtx t1 = gen_reg_rtx (Pmode);
38679 rtx t2 = gen_reg_rtx (Pmode);
38680 target = gen_reg_rtx (BNDmode);
38681 emit_move_insn (t1, const0_rtx);
38682 emit_move_insn (t2, constm1_rtx);
38683 emit_insn (BNDmode == BND64mode
38684 ? gen_bnd64_mk (target, t1, t2)
38685 : gen_bnd32_mk (target, t1, t2));
38688 gcc_assert (target && REG_P (target));
38689 return target;
38691 case IX86_BUILTIN_BNDNARROW:
38693 rtx m1, m1h1, m1h2, lb, ub, t1;
38695 /* Return value and lb. */
38696 arg0 = CALL_EXPR_ARG (exp, 0);
38697 /* Bounds. */
38698 arg1 = CALL_EXPR_ARG (exp, 1);
38699 /* Size. */
38700 arg2 = CALL_EXPR_ARG (exp, 2);
38702 lb = expand_normal (arg0);
38703 op1 = expand_normal (arg1);
38704 op2 = expand_normal (arg2);
38706 /* Size was passed but we need to use (size - 1) as for bndmk. */
38707 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38708 NULL_RTX, 1, OPTAB_DIRECT);
38710 /* Add LB to size and inverse to get UB. */
38711 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38712 op2, 1, OPTAB_DIRECT);
38713 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38715 if (!register_operand (lb, Pmode))
38716 lb = ix86_zero_extend_to_Pmode (lb);
38717 if (!register_operand (ub, Pmode))
38718 ub = ix86_zero_extend_to_Pmode (ub);
38720 /* We need to move bounds to memory before any computations. */
38721 if (MEM_P (op1))
38722 m1 = op1;
38723 else
38725 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38726 emit_move_insn (m1, op1);
38729 /* Generate mem expression to be used for access to LB and UB. */
38730 m1h1 = adjust_address (m1, Pmode, 0);
38731 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38733 t1 = gen_reg_rtx (Pmode);
38735 /* Compute LB. */
38736 emit_move_insn (t1, m1h1);
38737 ix86_emit_move_max (t1, lb);
38738 emit_move_insn (m1h1, t1);
38740 /* Compute UB. UB is stored in 1's complement form. Therefore
38741 we also use max here. */
38742 emit_move_insn (t1, m1h2);
38743 ix86_emit_move_max (t1, ub);
38744 emit_move_insn (m1h2, t1);
38746 op2 = gen_reg_rtx (BNDmode);
38747 emit_move_insn (op2, m1);
38749 return chkp_join_splitted_slot (lb, op2);
38752 case IX86_BUILTIN_BNDINT:
38754 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38756 if (!target
38757 || GET_MODE (target) != BNDmode
38758 || !register_operand (target, BNDmode))
38759 target = gen_reg_rtx (BNDmode);
38761 arg0 = CALL_EXPR_ARG (exp, 0);
38762 arg1 = CALL_EXPR_ARG (exp, 1);
38764 op0 = expand_normal (arg0);
38765 op1 = expand_normal (arg1);
38767 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38768 rh1 = adjust_address (res, Pmode, 0);
38769 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38771 /* Put first bounds to temporaries. */
38772 lb1 = gen_reg_rtx (Pmode);
38773 ub1 = gen_reg_rtx (Pmode);
38774 if (MEM_P (op0))
38776 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38777 emit_move_insn (ub1, adjust_address (op0, Pmode,
38778 GET_MODE_SIZE (Pmode)));
38780 else
38782 emit_move_insn (res, op0);
38783 emit_move_insn (lb1, rh1);
38784 emit_move_insn (ub1, rh2);
38787 /* Put second bounds to temporaries. */
38788 lb2 = gen_reg_rtx (Pmode);
38789 ub2 = gen_reg_rtx (Pmode);
38790 if (MEM_P (op1))
38792 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38793 emit_move_insn (ub2, adjust_address (op1, Pmode,
38794 GET_MODE_SIZE (Pmode)));
38796 else
38798 emit_move_insn (res, op1);
38799 emit_move_insn (lb2, rh1);
38800 emit_move_insn (ub2, rh2);
38803 /* Compute LB. */
38804 ix86_emit_move_max (lb1, lb2);
38805 emit_move_insn (rh1, lb1);
38807 /* Compute UB. UB is stored in 1's complement form. Therefore
38808 we also use max here. */
38809 ix86_emit_move_max (ub1, ub2);
38810 emit_move_insn (rh2, ub1);
38812 emit_move_insn (target, res);
38814 return target;
38817 case IX86_BUILTIN_SIZEOF:
38819 tree name;
38820 rtx symbol;
38822 if (!target
38823 || GET_MODE (target) != Pmode
38824 || !register_operand (target, Pmode))
38825 target = gen_reg_rtx (Pmode);
38827 arg0 = CALL_EXPR_ARG (exp, 0);
38828 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38830 name = DECL_ASSEMBLER_NAME (arg0);
38831 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38833 emit_insn (Pmode == SImode
38834 ? gen_move_size_reloc_si (target, symbol)
38835 : gen_move_size_reloc_di (target, symbol));
38837 return target;
38840 case IX86_BUILTIN_BNDLOWER:
38842 rtx mem, hmem;
38844 if (!target
38845 || GET_MODE (target) != Pmode
38846 || !register_operand (target, Pmode))
38847 target = gen_reg_rtx (Pmode);
38849 arg0 = CALL_EXPR_ARG (exp, 0);
38850 op0 = expand_normal (arg0);
38852 /* We need to move bounds to memory first. */
38853 if (MEM_P (op0))
38854 mem = op0;
38855 else
38857 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38858 emit_move_insn (mem, op0);
38861 /* Generate mem expression to access LB and load it. */
38862 hmem = adjust_address (mem, Pmode, 0);
38863 emit_move_insn (target, hmem);
38865 return target;
38868 case IX86_BUILTIN_BNDUPPER:
38870 rtx mem, hmem, res;
38872 if (!target
38873 || GET_MODE (target) != Pmode
38874 || !register_operand (target, Pmode))
38875 target = gen_reg_rtx (Pmode);
38877 arg0 = CALL_EXPR_ARG (exp, 0);
38878 op0 = expand_normal (arg0);
38880 /* We need to move bounds to memory first. */
38881 if (MEM_P (op0))
38882 mem = op0;
38883 else
38885 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38886 emit_move_insn (mem, op0);
38889 /* Generate mem expression to access UB. */
38890 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38892 /* We need to inverse all bits of UB. */
38893 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38895 if (res != target)
38896 emit_move_insn (target, res);
38898 return target;
38901 case IX86_BUILTIN_MASKMOVQ:
38902 case IX86_BUILTIN_MASKMOVDQU:
38903 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38904 ? CODE_FOR_mmx_maskmovq
38905 : CODE_FOR_sse2_maskmovdqu);
38906 /* Note the arg order is different from the operand order. */
38907 arg1 = CALL_EXPR_ARG (exp, 0);
38908 arg2 = CALL_EXPR_ARG (exp, 1);
38909 arg0 = CALL_EXPR_ARG (exp, 2);
38910 op0 = expand_normal (arg0);
38911 op1 = expand_normal (arg1);
38912 op2 = expand_normal (arg2);
38913 mode0 = insn_data[icode].operand[0].mode;
38914 mode1 = insn_data[icode].operand[1].mode;
38915 mode2 = insn_data[icode].operand[2].mode;
38917 op0 = ix86_zero_extend_to_Pmode (op0);
38918 op0 = gen_rtx_MEM (mode1, op0);
38920 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38921 op0 = copy_to_mode_reg (mode0, op0);
38922 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38923 op1 = copy_to_mode_reg (mode1, op1);
38924 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38925 op2 = copy_to_mode_reg (mode2, op2);
38926 pat = GEN_FCN (icode) (op0, op1, op2);
38927 if (! pat)
38928 return 0;
38929 emit_insn (pat);
38930 return 0;
38932 case IX86_BUILTIN_LDMXCSR:
38933 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38934 target = assign_386_stack_local (SImode, SLOT_TEMP);
38935 emit_move_insn (target, op0);
38936 emit_insn (gen_sse_ldmxcsr (target));
38937 return 0;
38939 case IX86_BUILTIN_STMXCSR:
38940 target = assign_386_stack_local (SImode, SLOT_TEMP);
38941 emit_insn (gen_sse_stmxcsr (target));
38942 return copy_to_mode_reg (SImode, target);
38944 case IX86_BUILTIN_CLFLUSH:
38945 arg0 = CALL_EXPR_ARG (exp, 0);
38946 op0 = expand_normal (arg0);
38947 icode = CODE_FOR_sse2_clflush;
38948 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38949 op0 = ix86_zero_extend_to_Pmode (op0);
38951 emit_insn (gen_sse2_clflush (op0));
38952 return 0;
38954 case IX86_BUILTIN_CLWB:
38955 arg0 = CALL_EXPR_ARG (exp, 0);
38956 op0 = expand_normal (arg0);
38957 icode = CODE_FOR_clwb;
38958 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38959 op0 = ix86_zero_extend_to_Pmode (op0);
38961 emit_insn (gen_clwb (op0));
38962 return 0;
38964 case IX86_BUILTIN_CLFLUSHOPT:
38965 arg0 = CALL_EXPR_ARG (exp, 0);
38966 op0 = expand_normal (arg0);
38967 icode = CODE_FOR_clflushopt;
38968 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38969 op0 = ix86_zero_extend_to_Pmode (op0);
38971 emit_insn (gen_clflushopt (op0));
38972 return 0;
38974 case IX86_BUILTIN_MONITOR:
38975 arg0 = CALL_EXPR_ARG (exp, 0);
38976 arg1 = CALL_EXPR_ARG (exp, 1);
38977 arg2 = CALL_EXPR_ARG (exp, 2);
38978 op0 = expand_normal (arg0);
38979 op1 = expand_normal (arg1);
38980 op2 = expand_normal (arg2);
38981 if (!REG_P (op0))
38982 op0 = ix86_zero_extend_to_Pmode (op0);
38983 if (!REG_P (op1))
38984 op1 = copy_to_mode_reg (SImode, op1);
38985 if (!REG_P (op2))
38986 op2 = copy_to_mode_reg (SImode, op2);
38987 emit_insn (ix86_gen_monitor (op0, op1, op2));
38988 return 0;
38990 case IX86_BUILTIN_MWAIT:
38991 arg0 = CALL_EXPR_ARG (exp, 0);
38992 arg1 = CALL_EXPR_ARG (exp, 1);
38993 op0 = expand_normal (arg0);
38994 op1 = expand_normal (arg1);
38995 if (!REG_P (op0))
38996 op0 = copy_to_mode_reg (SImode, op0);
38997 if (!REG_P (op1))
38998 op1 = copy_to_mode_reg (SImode, op1);
38999 emit_insn (gen_sse3_mwait (op0, op1));
39000 return 0;
39002 case IX86_BUILTIN_VEC_INIT_V2SI:
39003 case IX86_BUILTIN_VEC_INIT_V4HI:
39004 case IX86_BUILTIN_VEC_INIT_V8QI:
39005 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39007 case IX86_BUILTIN_VEC_EXT_V2DF:
39008 case IX86_BUILTIN_VEC_EXT_V2DI:
39009 case IX86_BUILTIN_VEC_EXT_V4SF:
39010 case IX86_BUILTIN_VEC_EXT_V4SI:
39011 case IX86_BUILTIN_VEC_EXT_V8HI:
39012 case IX86_BUILTIN_VEC_EXT_V2SI:
39013 case IX86_BUILTIN_VEC_EXT_V4HI:
39014 case IX86_BUILTIN_VEC_EXT_V16QI:
39015 return ix86_expand_vec_ext_builtin (exp, target);
39017 case IX86_BUILTIN_VEC_SET_V2DI:
39018 case IX86_BUILTIN_VEC_SET_V4SF:
39019 case IX86_BUILTIN_VEC_SET_V4SI:
39020 case IX86_BUILTIN_VEC_SET_V8HI:
39021 case IX86_BUILTIN_VEC_SET_V4HI:
39022 case IX86_BUILTIN_VEC_SET_V16QI:
39023 return ix86_expand_vec_set_builtin (exp);
39025 case IX86_BUILTIN_INFQ:
39026 case IX86_BUILTIN_HUGE_VALQ:
39028 REAL_VALUE_TYPE inf;
39029 rtx tmp;
39031 real_inf (&inf);
39032 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39034 tmp = validize_mem (force_const_mem (mode, tmp));
39036 if (target == 0)
39037 target = gen_reg_rtx (mode);
39039 emit_move_insn (target, tmp);
39040 return target;
39043 case IX86_BUILTIN_RDPMC:
39044 case IX86_BUILTIN_RDTSC:
39045 case IX86_BUILTIN_RDTSCP:
39047 op0 = gen_reg_rtx (DImode);
39048 op1 = gen_reg_rtx (DImode);
39050 if (fcode == IX86_BUILTIN_RDPMC)
39052 arg0 = CALL_EXPR_ARG (exp, 0);
39053 op2 = expand_normal (arg0);
39054 if (!register_operand (op2, SImode))
39055 op2 = copy_to_mode_reg (SImode, op2);
39057 insn = (TARGET_64BIT
39058 ? gen_rdpmc_rex64 (op0, op1, op2)
39059 : gen_rdpmc (op0, op2));
39060 emit_insn (insn);
39062 else if (fcode == IX86_BUILTIN_RDTSC)
39064 insn = (TARGET_64BIT
39065 ? gen_rdtsc_rex64 (op0, op1)
39066 : gen_rdtsc (op0));
39067 emit_insn (insn);
39069 else
39071 op2 = gen_reg_rtx (SImode);
39073 insn = (TARGET_64BIT
39074 ? gen_rdtscp_rex64 (op0, op1, op2)
39075 : gen_rdtscp (op0, op2));
39076 emit_insn (insn);
39078 arg0 = CALL_EXPR_ARG (exp, 0);
39079 op4 = expand_normal (arg0);
39080 if (!address_operand (op4, VOIDmode))
39082 op4 = convert_memory_address (Pmode, op4);
39083 op4 = copy_addr_to_reg (op4);
39085 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39088 if (target == 0)
39090 /* mode is VOIDmode if __builtin_rd* has been called
39091 without lhs. */
39092 if (mode == VOIDmode)
39093 return target;
39094 target = gen_reg_rtx (mode);
39097 if (TARGET_64BIT)
39099 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39100 op1, 1, OPTAB_DIRECT);
39101 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39102 op0, 1, OPTAB_DIRECT);
39105 emit_move_insn (target, op0);
39106 return target;
39108 case IX86_BUILTIN_FXSAVE:
39109 case IX86_BUILTIN_FXRSTOR:
39110 case IX86_BUILTIN_FXSAVE64:
39111 case IX86_BUILTIN_FXRSTOR64:
39112 case IX86_BUILTIN_FNSTENV:
39113 case IX86_BUILTIN_FLDENV:
39114 mode0 = BLKmode;
39115 switch (fcode)
39117 case IX86_BUILTIN_FXSAVE:
39118 icode = CODE_FOR_fxsave;
39119 break;
39120 case IX86_BUILTIN_FXRSTOR:
39121 icode = CODE_FOR_fxrstor;
39122 break;
39123 case IX86_BUILTIN_FXSAVE64:
39124 icode = CODE_FOR_fxsave64;
39125 break;
39126 case IX86_BUILTIN_FXRSTOR64:
39127 icode = CODE_FOR_fxrstor64;
39128 break;
39129 case IX86_BUILTIN_FNSTENV:
39130 icode = CODE_FOR_fnstenv;
39131 break;
39132 case IX86_BUILTIN_FLDENV:
39133 icode = CODE_FOR_fldenv;
39134 break;
39135 default:
39136 gcc_unreachable ();
39139 arg0 = CALL_EXPR_ARG (exp, 0);
39140 op0 = expand_normal (arg0);
39142 if (!address_operand (op0, VOIDmode))
39144 op0 = convert_memory_address (Pmode, op0);
39145 op0 = copy_addr_to_reg (op0);
39147 op0 = gen_rtx_MEM (mode0, op0);
39149 pat = GEN_FCN (icode) (op0);
39150 if (pat)
39151 emit_insn (pat);
39152 return 0;
39154 case IX86_BUILTIN_XSAVE:
39155 case IX86_BUILTIN_XRSTOR:
39156 case IX86_BUILTIN_XSAVE64:
39157 case IX86_BUILTIN_XRSTOR64:
39158 case IX86_BUILTIN_XSAVEOPT:
39159 case IX86_BUILTIN_XSAVEOPT64:
39160 case IX86_BUILTIN_XSAVES:
39161 case IX86_BUILTIN_XRSTORS:
39162 case IX86_BUILTIN_XSAVES64:
39163 case IX86_BUILTIN_XRSTORS64:
39164 case IX86_BUILTIN_XSAVEC:
39165 case IX86_BUILTIN_XSAVEC64:
39166 arg0 = CALL_EXPR_ARG (exp, 0);
39167 arg1 = CALL_EXPR_ARG (exp, 1);
39168 op0 = expand_normal (arg0);
39169 op1 = expand_normal (arg1);
39171 if (!address_operand (op0, VOIDmode))
39173 op0 = convert_memory_address (Pmode, op0);
39174 op0 = copy_addr_to_reg (op0);
39176 op0 = gen_rtx_MEM (BLKmode, op0);
39178 op1 = force_reg (DImode, op1);
39180 if (TARGET_64BIT)
39182 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39183 NULL, 1, OPTAB_DIRECT);
39184 switch (fcode)
39186 case IX86_BUILTIN_XSAVE:
39187 icode = CODE_FOR_xsave_rex64;
39188 break;
39189 case IX86_BUILTIN_XRSTOR:
39190 icode = CODE_FOR_xrstor_rex64;
39191 break;
39192 case IX86_BUILTIN_XSAVE64:
39193 icode = CODE_FOR_xsave64;
39194 break;
39195 case IX86_BUILTIN_XRSTOR64:
39196 icode = CODE_FOR_xrstor64;
39197 break;
39198 case IX86_BUILTIN_XSAVEOPT:
39199 icode = CODE_FOR_xsaveopt_rex64;
39200 break;
39201 case IX86_BUILTIN_XSAVEOPT64:
39202 icode = CODE_FOR_xsaveopt64;
39203 break;
39204 case IX86_BUILTIN_XSAVES:
39205 icode = CODE_FOR_xsaves_rex64;
39206 break;
39207 case IX86_BUILTIN_XRSTORS:
39208 icode = CODE_FOR_xrstors_rex64;
39209 break;
39210 case IX86_BUILTIN_XSAVES64:
39211 icode = CODE_FOR_xsaves64;
39212 break;
39213 case IX86_BUILTIN_XRSTORS64:
39214 icode = CODE_FOR_xrstors64;
39215 break;
39216 case IX86_BUILTIN_XSAVEC:
39217 icode = CODE_FOR_xsavec_rex64;
39218 break;
39219 case IX86_BUILTIN_XSAVEC64:
39220 icode = CODE_FOR_xsavec64;
39221 break;
39222 default:
39223 gcc_unreachable ();
39226 op2 = gen_lowpart (SImode, op2);
39227 op1 = gen_lowpart (SImode, op1);
39228 pat = GEN_FCN (icode) (op0, op1, op2);
39230 else
39232 switch (fcode)
39234 case IX86_BUILTIN_XSAVE:
39235 icode = CODE_FOR_xsave;
39236 break;
39237 case IX86_BUILTIN_XRSTOR:
39238 icode = CODE_FOR_xrstor;
39239 break;
39240 case IX86_BUILTIN_XSAVEOPT:
39241 icode = CODE_FOR_xsaveopt;
39242 break;
39243 case IX86_BUILTIN_XSAVES:
39244 icode = CODE_FOR_xsaves;
39245 break;
39246 case IX86_BUILTIN_XRSTORS:
39247 icode = CODE_FOR_xrstors;
39248 break;
39249 case IX86_BUILTIN_XSAVEC:
39250 icode = CODE_FOR_xsavec;
39251 break;
39252 default:
39253 gcc_unreachable ();
39255 pat = GEN_FCN (icode) (op0, op1);
39258 if (pat)
39259 emit_insn (pat);
39260 return 0;
39262 case IX86_BUILTIN_LLWPCB:
39263 arg0 = CALL_EXPR_ARG (exp, 0);
39264 op0 = expand_normal (arg0);
39265 icode = CODE_FOR_lwp_llwpcb;
39266 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39267 op0 = ix86_zero_extend_to_Pmode (op0);
39268 emit_insn (gen_lwp_llwpcb (op0));
39269 return 0;
39271 case IX86_BUILTIN_SLWPCB:
39272 icode = CODE_FOR_lwp_slwpcb;
39273 if (!target
39274 || !insn_data[icode].operand[0].predicate (target, Pmode))
39275 target = gen_reg_rtx (Pmode);
39276 emit_insn (gen_lwp_slwpcb (target));
39277 return target;
39279 case IX86_BUILTIN_BEXTRI32:
39280 case IX86_BUILTIN_BEXTRI64:
39281 arg0 = CALL_EXPR_ARG (exp, 0);
39282 arg1 = CALL_EXPR_ARG (exp, 1);
39283 op0 = expand_normal (arg0);
39284 op1 = expand_normal (arg1);
39285 icode = (fcode == IX86_BUILTIN_BEXTRI32
39286 ? CODE_FOR_tbm_bextri_si
39287 : CODE_FOR_tbm_bextri_di);
39288 if (!CONST_INT_P (op1))
39290 error ("last argument must be an immediate");
39291 return const0_rtx;
39293 else
39295 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39296 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39297 op1 = GEN_INT (length);
39298 op2 = GEN_INT (lsb_index);
39299 pat = GEN_FCN (icode) (target, op0, op1, op2);
39300 if (pat)
39301 emit_insn (pat);
39302 return target;
39305 case IX86_BUILTIN_RDRAND16_STEP:
39306 icode = CODE_FOR_rdrandhi_1;
39307 mode0 = HImode;
39308 goto rdrand_step;
39310 case IX86_BUILTIN_RDRAND32_STEP:
39311 icode = CODE_FOR_rdrandsi_1;
39312 mode0 = SImode;
39313 goto rdrand_step;
39315 case IX86_BUILTIN_RDRAND64_STEP:
39316 icode = CODE_FOR_rdranddi_1;
39317 mode0 = DImode;
39319 rdrand_step:
39320 op0 = gen_reg_rtx (mode0);
39321 emit_insn (GEN_FCN (icode) (op0));
39323 arg0 = CALL_EXPR_ARG (exp, 0);
39324 op1 = expand_normal (arg0);
39325 if (!address_operand (op1, VOIDmode))
39327 op1 = convert_memory_address (Pmode, op1);
39328 op1 = copy_addr_to_reg (op1);
39330 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39332 op1 = gen_reg_rtx (SImode);
39333 emit_move_insn (op1, CONST1_RTX (SImode));
39335 /* Emit SImode conditional move. */
39336 if (mode0 == HImode)
39338 op2 = gen_reg_rtx (SImode);
39339 emit_insn (gen_zero_extendhisi2 (op2, op0));
39341 else if (mode0 == SImode)
39342 op2 = op0;
39343 else
39344 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39346 if (target == 0
39347 || !register_operand (target, SImode))
39348 target = gen_reg_rtx (SImode);
39350 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39351 const0_rtx);
39352 emit_insn (gen_rtx_SET (VOIDmode, target,
39353 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39354 return target;
39356 case IX86_BUILTIN_RDSEED16_STEP:
39357 icode = CODE_FOR_rdseedhi_1;
39358 mode0 = HImode;
39359 goto rdseed_step;
39361 case IX86_BUILTIN_RDSEED32_STEP:
39362 icode = CODE_FOR_rdseedsi_1;
39363 mode0 = SImode;
39364 goto rdseed_step;
39366 case IX86_BUILTIN_RDSEED64_STEP:
39367 icode = CODE_FOR_rdseeddi_1;
39368 mode0 = DImode;
39370 rdseed_step:
39371 op0 = gen_reg_rtx (mode0);
39372 emit_insn (GEN_FCN (icode) (op0));
39374 arg0 = CALL_EXPR_ARG (exp, 0);
39375 op1 = expand_normal (arg0);
39376 if (!address_operand (op1, VOIDmode))
39378 op1 = convert_memory_address (Pmode, op1);
39379 op1 = copy_addr_to_reg (op1);
39381 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39383 op2 = gen_reg_rtx (QImode);
39385 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39386 const0_rtx);
39387 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39389 if (target == 0
39390 || !register_operand (target, SImode))
39391 target = gen_reg_rtx (SImode);
39393 emit_insn (gen_zero_extendqisi2 (target, op2));
39394 return target;
39396 case IX86_BUILTIN_SBB32:
39397 icode = CODE_FOR_subsi3_carry;
39398 mode0 = SImode;
39399 goto addcarryx;
39401 case IX86_BUILTIN_SBB64:
39402 icode = CODE_FOR_subdi3_carry;
39403 mode0 = DImode;
39404 goto addcarryx;
39406 case IX86_BUILTIN_ADDCARRYX32:
39407 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39408 mode0 = SImode;
39409 goto addcarryx;
39411 case IX86_BUILTIN_ADDCARRYX64:
39412 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39413 mode0 = DImode;
39415 addcarryx:
39416 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39417 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39418 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39419 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39421 op0 = gen_reg_rtx (QImode);
39423 /* Generate CF from input operand. */
39424 op1 = expand_normal (arg0);
39425 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39426 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39428 /* Gen ADCX instruction to compute X+Y+CF. */
39429 op2 = expand_normal (arg1);
39430 op3 = expand_normal (arg2);
39432 if (!REG_P (op2))
39433 op2 = copy_to_mode_reg (mode0, op2);
39434 if (!REG_P (op3))
39435 op3 = copy_to_mode_reg (mode0, op3);
39437 op0 = gen_reg_rtx (mode0);
39439 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39440 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39441 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39443 /* Store the result. */
39444 op4 = expand_normal (arg3);
39445 if (!address_operand (op4, VOIDmode))
39447 op4 = convert_memory_address (Pmode, op4);
39448 op4 = copy_addr_to_reg (op4);
39450 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39452 /* Return current CF value. */
39453 if (target == 0)
39454 target = gen_reg_rtx (QImode);
39456 PUT_MODE (pat, QImode);
39457 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39458 return target;
39460 case IX86_BUILTIN_READ_FLAGS:
39461 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39463 if (optimize
39464 || target == NULL_RTX
39465 || !nonimmediate_operand (target, word_mode)
39466 || GET_MODE (target) != word_mode)
39467 target = gen_reg_rtx (word_mode);
39469 emit_insn (gen_pop (target));
39470 return target;
39472 case IX86_BUILTIN_WRITE_FLAGS:
39474 arg0 = CALL_EXPR_ARG (exp, 0);
39475 op0 = expand_normal (arg0);
39476 if (!general_no_elim_operand (op0, word_mode))
39477 op0 = copy_to_mode_reg (word_mode, op0);
39479 emit_insn (gen_push (op0));
39480 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39481 return 0;
39483 case IX86_BUILTIN_KORTESTC16:
39484 icode = CODE_FOR_kortestchi;
39485 mode0 = HImode;
39486 mode1 = CCCmode;
39487 goto kortest;
39489 case IX86_BUILTIN_KORTESTZ16:
39490 icode = CODE_FOR_kortestzhi;
39491 mode0 = HImode;
39492 mode1 = CCZmode;
39494 kortest:
39495 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39496 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39497 op0 = expand_normal (arg0);
39498 op1 = expand_normal (arg1);
39500 op0 = copy_to_reg (op0);
39501 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39502 op1 = copy_to_reg (op1);
39503 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39505 target = gen_reg_rtx (QImode);
39506 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39508 /* Emit kortest. */
39509 emit_insn (GEN_FCN (icode) (op0, op1));
39510 /* And use setcc to return result from flags. */
39511 ix86_expand_setcc (target, EQ,
39512 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39513 return target;
39515 case IX86_BUILTIN_GATHERSIV2DF:
39516 icode = CODE_FOR_avx2_gathersiv2df;
39517 goto gather_gen;
39518 case IX86_BUILTIN_GATHERSIV4DF:
39519 icode = CODE_FOR_avx2_gathersiv4df;
39520 goto gather_gen;
39521 case IX86_BUILTIN_GATHERDIV2DF:
39522 icode = CODE_FOR_avx2_gatherdiv2df;
39523 goto gather_gen;
39524 case IX86_BUILTIN_GATHERDIV4DF:
39525 icode = CODE_FOR_avx2_gatherdiv4df;
39526 goto gather_gen;
39527 case IX86_BUILTIN_GATHERSIV4SF:
39528 icode = CODE_FOR_avx2_gathersiv4sf;
39529 goto gather_gen;
39530 case IX86_BUILTIN_GATHERSIV8SF:
39531 icode = CODE_FOR_avx2_gathersiv8sf;
39532 goto gather_gen;
39533 case IX86_BUILTIN_GATHERDIV4SF:
39534 icode = CODE_FOR_avx2_gatherdiv4sf;
39535 goto gather_gen;
39536 case IX86_BUILTIN_GATHERDIV8SF:
39537 icode = CODE_FOR_avx2_gatherdiv8sf;
39538 goto gather_gen;
39539 case IX86_BUILTIN_GATHERSIV2DI:
39540 icode = CODE_FOR_avx2_gathersiv2di;
39541 goto gather_gen;
39542 case IX86_BUILTIN_GATHERSIV4DI:
39543 icode = CODE_FOR_avx2_gathersiv4di;
39544 goto gather_gen;
39545 case IX86_BUILTIN_GATHERDIV2DI:
39546 icode = CODE_FOR_avx2_gatherdiv2di;
39547 goto gather_gen;
39548 case IX86_BUILTIN_GATHERDIV4DI:
39549 icode = CODE_FOR_avx2_gatherdiv4di;
39550 goto gather_gen;
39551 case IX86_BUILTIN_GATHERSIV4SI:
39552 icode = CODE_FOR_avx2_gathersiv4si;
39553 goto gather_gen;
39554 case IX86_BUILTIN_GATHERSIV8SI:
39555 icode = CODE_FOR_avx2_gathersiv8si;
39556 goto gather_gen;
39557 case IX86_BUILTIN_GATHERDIV4SI:
39558 icode = CODE_FOR_avx2_gatherdiv4si;
39559 goto gather_gen;
39560 case IX86_BUILTIN_GATHERDIV8SI:
39561 icode = CODE_FOR_avx2_gatherdiv8si;
39562 goto gather_gen;
39563 case IX86_BUILTIN_GATHERALTSIV4DF:
39564 icode = CODE_FOR_avx2_gathersiv4df;
39565 goto gather_gen;
39566 case IX86_BUILTIN_GATHERALTDIV8SF:
39567 icode = CODE_FOR_avx2_gatherdiv8sf;
39568 goto gather_gen;
39569 case IX86_BUILTIN_GATHERALTSIV4DI:
39570 icode = CODE_FOR_avx2_gathersiv4di;
39571 goto gather_gen;
39572 case IX86_BUILTIN_GATHERALTDIV8SI:
39573 icode = CODE_FOR_avx2_gatherdiv8si;
39574 goto gather_gen;
39575 case IX86_BUILTIN_GATHER3SIV16SF:
39576 icode = CODE_FOR_avx512f_gathersiv16sf;
39577 goto gather_gen;
39578 case IX86_BUILTIN_GATHER3SIV8DF:
39579 icode = CODE_FOR_avx512f_gathersiv8df;
39580 goto gather_gen;
39581 case IX86_BUILTIN_GATHER3DIV16SF:
39582 icode = CODE_FOR_avx512f_gatherdiv16sf;
39583 goto gather_gen;
39584 case IX86_BUILTIN_GATHER3DIV8DF:
39585 icode = CODE_FOR_avx512f_gatherdiv8df;
39586 goto gather_gen;
39587 case IX86_BUILTIN_GATHER3SIV16SI:
39588 icode = CODE_FOR_avx512f_gathersiv16si;
39589 goto gather_gen;
39590 case IX86_BUILTIN_GATHER3SIV8DI:
39591 icode = CODE_FOR_avx512f_gathersiv8di;
39592 goto gather_gen;
39593 case IX86_BUILTIN_GATHER3DIV16SI:
39594 icode = CODE_FOR_avx512f_gatherdiv16si;
39595 goto gather_gen;
39596 case IX86_BUILTIN_GATHER3DIV8DI:
39597 icode = CODE_FOR_avx512f_gatherdiv8di;
39598 goto gather_gen;
39599 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39600 icode = CODE_FOR_avx512f_gathersiv8df;
39601 goto gather_gen;
39602 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39603 icode = CODE_FOR_avx512f_gatherdiv16sf;
39604 goto gather_gen;
39605 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39606 icode = CODE_FOR_avx512f_gathersiv8di;
39607 goto gather_gen;
39608 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39609 icode = CODE_FOR_avx512f_gatherdiv16si;
39610 goto gather_gen;
39611 case IX86_BUILTIN_GATHER3SIV2DF:
39612 icode = CODE_FOR_avx512vl_gathersiv2df;
39613 goto gather_gen;
39614 case IX86_BUILTIN_GATHER3SIV4DF:
39615 icode = CODE_FOR_avx512vl_gathersiv4df;
39616 goto gather_gen;
39617 case IX86_BUILTIN_GATHER3DIV2DF:
39618 icode = CODE_FOR_avx512vl_gatherdiv2df;
39619 goto gather_gen;
39620 case IX86_BUILTIN_GATHER3DIV4DF:
39621 icode = CODE_FOR_avx512vl_gatherdiv4df;
39622 goto gather_gen;
39623 case IX86_BUILTIN_GATHER3SIV4SF:
39624 icode = CODE_FOR_avx512vl_gathersiv4sf;
39625 goto gather_gen;
39626 case IX86_BUILTIN_GATHER3SIV8SF:
39627 icode = CODE_FOR_avx512vl_gathersiv8sf;
39628 goto gather_gen;
39629 case IX86_BUILTIN_GATHER3DIV4SF:
39630 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39631 goto gather_gen;
39632 case IX86_BUILTIN_GATHER3DIV8SF:
39633 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39634 goto gather_gen;
39635 case IX86_BUILTIN_GATHER3SIV2DI:
39636 icode = CODE_FOR_avx512vl_gathersiv2di;
39637 goto gather_gen;
39638 case IX86_BUILTIN_GATHER3SIV4DI:
39639 icode = CODE_FOR_avx512vl_gathersiv4di;
39640 goto gather_gen;
39641 case IX86_BUILTIN_GATHER3DIV2DI:
39642 icode = CODE_FOR_avx512vl_gatherdiv2di;
39643 goto gather_gen;
39644 case IX86_BUILTIN_GATHER3DIV4DI:
39645 icode = CODE_FOR_avx512vl_gatherdiv4di;
39646 goto gather_gen;
39647 case IX86_BUILTIN_GATHER3SIV4SI:
39648 icode = CODE_FOR_avx512vl_gathersiv4si;
39649 goto gather_gen;
39650 case IX86_BUILTIN_GATHER3SIV8SI:
39651 icode = CODE_FOR_avx512vl_gathersiv8si;
39652 goto gather_gen;
39653 case IX86_BUILTIN_GATHER3DIV4SI:
39654 icode = CODE_FOR_avx512vl_gatherdiv4si;
39655 goto gather_gen;
39656 case IX86_BUILTIN_GATHER3DIV8SI:
39657 icode = CODE_FOR_avx512vl_gatherdiv8si;
39658 goto gather_gen;
39659 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39660 icode = CODE_FOR_avx512vl_gathersiv4df;
39661 goto gather_gen;
39662 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39663 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39664 goto gather_gen;
39665 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39666 icode = CODE_FOR_avx512vl_gathersiv4di;
39667 goto gather_gen;
39668 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39669 icode = CODE_FOR_avx512vl_gatherdiv8si;
39670 goto gather_gen;
39671 case IX86_BUILTIN_SCATTERSIV16SF:
39672 icode = CODE_FOR_avx512f_scattersiv16sf;
39673 goto scatter_gen;
39674 case IX86_BUILTIN_SCATTERSIV8DF:
39675 icode = CODE_FOR_avx512f_scattersiv8df;
39676 goto scatter_gen;
39677 case IX86_BUILTIN_SCATTERDIV16SF:
39678 icode = CODE_FOR_avx512f_scatterdiv16sf;
39679 goto scatter_gen;
39680 case IX86_BUILTIN_SCATTERDIV8DF:
39681 icode = CODE_FOR_avx512f_scatterdiv8df;
39682 goto scatter_gen;
39683 case IX86_BUILTIN_SCATTERSIV16SI:
39684 icode = CODE_FOR_avx512f_scattersiv16si;
39685 goto scatter_gen;
39686 case IX86_BUILTIN_SCATTERSIV8DI:
39687 icode = CODE_FOR_avx512f_scattersiv8di;
39688 goto scatter_gen;
39689 case IX86_BUILTIN_SCATTERDIV16SI:
39690 icode = CODE_FOR_avx512f_scatterdiv16si;
39691 goto scatter_gen;
39692 case IX86_BUILTIN_SCATTERDIV8DI:
39693 icode = CODE_FOR_avx512f_scatterdiv8di;
39694 goto scatter_gen;
39695 case IX86_BUILTIN_SCATTERSIV8SF:
39696 icode = CODE_FOR_avx512vl_scattersiv8sf;
39697 goto scatter_gen;
39698 case IX86_BUILTIN_SCATTERSIV4SF:
39699 icode = CODE_FOR_avx512vl_scattersiv4sf;
39700 goto scatter_gen;
39701 case IX86_BUILTIN_SCATTERSIV4DF:
39702 icode = CODE_FOR_avx512vl_scattersiv4df;
39703 goto scatter_gen;
39704 case IX86_BUILTIN_SCATTERSIV2DF:
39705 icode = CODE_FOR_avx512vl_scattersiv2df;
39706 goto scatter_gen;
39707 case IX86_BUILTIN_SCATTERDIV8SF:
39708 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39709 goto scatter_gen;
39710 case IX86_BUILTIN_SCATTERDIV4SF:
39711 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39712 goto scatter_gen;
39713 case IX86_BUILTIN_SCATTERDIV4DF:
39714 icode = CODE_FOR_avx512vl_scatterdiv4df;
39715 goto scatter_gen;
39716 case IX86_BUILTIN_SCATTERDIV2DF:
39717 icode = CODE_FOR_avx512vl_scatterdiv2df;
39718 goto scatter_gen;
39719 case IX86_BUILTIN_SCATTERSIV8SI:
39720 icode = CODE_FOR_avx512vl_scattersiv8si;
39721 goto scatter_gen;
39722 case IX86_BUILTIN_SCATTERSIV4SI:
39723 icode = CODE_FOR_avx512vl_scattersiv4si;
39724 goto scatter_gen;
39725 case IX86_BUILTIN_SCATTERSIV4DI:
39726 icode = CODE_FOR_avx512vl_scattersiv4di;
39727 goto scatter_gen;
39728 case IX86_BUILTIN_SCATTERSIV2DI:
39729 icode = CODE_FOR_avx512vl_scattersiv2di;
39730 goto scatter_gen;
39731 case IX86_BUILTIN_SCATTERDIV8SI:
39732 icode = CODE_FOR_avx512vl_scatterdiv8si;
39733 goto scatter_gen;
39734 case IX86_BUILTIN_SCATTERDIV4SI:
39735 icode = CODE_FOR_avx512vl_scatterdiv4si;
39736 goto scatter_gen;
39737 case IX86_BUILTIN_SCATTERDIV4DI:
39738 icode = CODE_FOR_avx512vl_scatterdiv4di;
39739 goto scatter_gen;
39740 case IX86_BUILTIN_SCATTERDIV2DI:
39741 icode = CODE_FOR_avx512vl_scatterdiv2di;
39742 goto scatter_gen;
39743 case IX86_BUILTIN_GATHERPFDPD:
39744 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39745 goto vec_prefetch_gen;
39746 case IX86_BUILTIN_GATHERPFDPS:
39747 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39748 goto vec_prefetch_gen;
39749 case IX86_BUILTIN_GATHERPFQPD:
39750 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39751 goto vec_prefetch_gen;
39752 case IX86_BUILTIN_GATHERPFQPS:
39753 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39754 goto vec_prefetch_gen;
39755 case IX86_BUILTIN_SCATTERPFDPD:
39756 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39757 goto vec_prefetch_gen;
39758 case IX86_BUILTIN_SCATTERPFDPS:
39759 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39760 goto vec_prefetch_gen;
39761 case IX86_BUILTIN_SCATTERPFQPD:
39762 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39763 goto vec_prefetch_gen;
39764 case IX86_BUILTIN_SCATTERPFQPS:
39765 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39766 goto vec_prefetch_gen;
39768 gather_gen:
39769 rtx half;
39770 rtx (*gen) (rtx, rtx);
39772 arg0 = CALL_EXPR_ARG (exp, 0);
39773 arg1 = CALL_EXPR_ARG (exp, 1);
39774 arg2 = CALL_EXPR_ARG (exp, 2);
39775 arg3 = CALL_EXPR_ARG (exp, 3);
39776 arg4 = CALL_EXPR_ARG (exp, 4);
39777 op0 = expand_normal (arg0);
39778 op1 = expand_normal (arg1);
39779 op2 = expand_normal (arg2);
39780 op3 = expand_normal (arg3);
39781 op4 = expand_normal (arg4);
39782 /* Note the arg order is different from the operand order. */
39783 mode0 = insn_data[icode].operand[1].mode;
39784 mode2 = insn_data[icode].operand[3].mode;
39785 mode3 = insn_data[icode].operand[4].mode;
39786 mode4 = insn_data[icode].operand[5].mode;
39788 if (target == NULL_RTX
39789 || GET_MODE (target) != insn_data[icode].operand[0].mode
39790 || !insn_data[icode].operand[0].predicate (target,
39791 GET_MODE (target)))
39792 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39793 else
39794 subtarget = target;
39796 switch (fcode)
39798 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39799 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39800 half = gen_reg_rtx (V8SImode);
39801 if (!nonimmediate_operand (op2, V16SImode))
39802 op2 = copy_to_mode_reg (V16SImode, op2);
39803 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39804 op2 = half;
39805 break;
39806 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39807 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39808 case IX86_BUILTIN_GATHERALTSIV4DF:
39809 case IX86_BUILTIN_GATHERALTSIV4DI:
39810 half = gen_reg_rtx (V4SImode);
39811 if (!nonimmediate_operand (op2, V8SImode))
39812 op2 = copy_to_mode_reg (V8SImode, op2);
39813 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39814 op2 = half;
39815 break;
39816 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39817 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39818 half = gen_reg_rtx (mode0);
39819 if (mode0 == V8SFmode)
39820 gen = gen_vec_extract_lo_v16sf;
39821 else
39822 gen = gen_vec_extract_lo_v16si;
39823 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39824 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39825 emit_insn (gen (half, op0));
39826 op0 = half;
39827 if (GET_MODE (op3) != VOIDmode)
39829 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39830 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39831 emit_insn (gen (half, op3));
39832 op3 = half;
39834 break;
39835 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39836 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39837 case IX86_BUILTIN_GATHERALTDIV8SF:
39838 case IX86_BUILTIN_GATHERALTDIV8SI:
39839 half = gen_reg_rtx (mode0);
39840 if (mode0 == V4SFmode)
39841 gen = gen_vec_extract_lo_v8sf;
39842 else
39843 gen = gen_vec_extract_lo_v8si;
39844 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39845 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39846 emit_insn (gen (half, op0));
39847 op0 = half;
39848 if (GET_MODE (op3) != VOIDmode)
39850 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39851 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39852 emit_insn (gen (half, op3));
39853 op3 = half;
39855 break;
39856 default:
39857 break;
39860 /* Force memory operand only with base register here. But we
39861 don't want to do it on memory operand for other builtin
39862 functions. */
39863 op1 = ix86_zero_extend_to_Pmode (op1);
39865 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39866 op0 = copy_to_mode_reg (mode0, op0);
39867 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39868 op1 = copy_to_mode_reg (Pmode, op1);
39869 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39870 op2 = copy_to_mode_reg (mode2, op2);
39872 op3 = fixup_modeless_constant (op3, mode3);
39874 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39876 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39877 op3 = copy_to_mode_reg (mode3, op3);
39879 else
39881 op3 = copy_to_reg (op3);
39882 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39884 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39886 error ("the last argument must be scale 1, 2, 4, 8");
39887 return const0_rtx;
39890 /* Optimize. If mask is known to have all high bits set,
39891 replace op0 with pc_rtx to signal that the instruction
39892 overwrites the whole destination and doesn't use its
39893 previous contents. */
39894 if (optimize)
39896 if (TREE_CODE (arg3) == INTEGER_CST)
39898 if (integer_all_onesp (arg3))
39899 op0 = pc_rtx;
39901 else if (TREE_CODE (arg3) == VECTOR_CST)
39903 unsigned int negative = 0;
39904 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39906 tree cst = VECTOR_CST_ELT (arg3, i);
39907 if (TREE_CODE (cst) == INTEGER_CST
39908 && tree_int_cst_sign_bit (cst))
39909 negative++;
39910 else if (TREE_CODE (cst) == REAL_CST
39911 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39912 negative++;
39914 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39915 op0 = pc_rtx;
39917 else if (TREE_CODE (arg3) == SSA_NAME
39918 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39920 /* Recognize also when mask is like:
39921 __v2df src = _mm_setzero_pd ();
39922 __v2df mask = _mm_cmpeq_pd (src, src);
39924 __v8sf src = _mm256_setzero_ps ();
39925 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39926 as that is a cheaper way to load all ones into
39927 a register than having to load a constant from
39928 memory. */
39929 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39930 if (is_gimple_call (def_stmt))
39932 tree fndecl = gimple_call_fndecl (def_stmt);
39933 if (fndecl
39934 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39935 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39937 case IX86_BUILTIN_CMPPD:
39938 case IX86_BUILTIN_CMPPS:
39939 case IX86_BUILTIN_CMPPD256:
39940 case IX86_BUILTIN_CMPPS256:
39941 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39942 break;
39943 /* FALLTHRU */
39944 case IX86_BUILTIN_CMPEQPD:
39945 case IX86_BUILTIN_CMPEQPS:
39946 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39947 && initializer_zerop (gimple_call_arg (def_stmt,
39948 1)))
39949 op0 = pc_rtx;
39950 break;
39951 default:
39952 break;
39958 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39959 if (! pat)
39960 return const0_rtx;
39961 emit_insn (pat);
39963 switch (fcode)
39965 case IX86_BUILTIN_GATHER3DIV16SF:
39966 if (target == NULL_RTX)
39967 target = gen_reg_rtx (V8SFmode);
39968 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39969 break;
39970 case IX86_BUILTIN_GATHER3DIV16SI:
39971 if (target == NULL_RTX)
39972 target = gen_reg_rtx (V8SImode);
39973 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39974 break;
39975 case IX86_BUILTIN_GATHER3DIV8SF:
39976 case IX86_BUILTIN_GATHERDIV8SF:
39977 if (target == NULL_RTX)
39978 target = gen_reg_rtx (V4SFmode);
39979 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39980 break;
39981 case IX86_BUILTIN_GATHER3DIV8SI:
39982 case IX86_BUILTIN_GATHERDIV8SI:
39983 if (target == NULL_RTX)
39984 target = gen_reg_rtx (V4SImode);
39985 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39986 break;
39987 default:
39988 target = subtarget;
39989 break;
39991 return target;
39993 scatter_gen:
39994 arg0 = CALL_EXPR_ARG (exp, 0);
39995 arg1 = CALL_EXPR_ARG (exp, 1);
39996 arg2 = CALL_EXPR_ARG (exp, 2);
39997 arg3 = CALL_EXPR_ARG (exp, 3);
39998 arg4 = CALL_EXPR_ARG (exp, 4);
39999 op0 = expand_normal (arg0);
40000 op1 = expand_normal (arg1);
40001 op2 = expand_normal (arg2);
40002 op3 = expand_normal (arg3);
40003 op4 = expand_normal (arg4);
40004 mode1 = insn_data[icode].operand[1].mode;
40005 mode2 = insn_data[icode].operand[2].mode;
40006 mode3 = insn_data[icode].operand[3].mode;
40007 mode4 = insn_data[icode].operand[4].mode;
40009 /* Force memory operand only with base register here. But we
40010 don't want to do it on memory operand for other builtin
40011 functions. */
40012 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40014 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40015 op0 = copy_to_mode_reg (Pmode, op0);
40017 op1 = fixup_modeless_constant (op1, mode1);
40019 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40021 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40022 op1 = copy_to_mode_reg (mode1, op1);
40024 else
40026 op1 = copy_to_reg (op1);
40027 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40030 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40031 op2 = copy_to_mode_reg (mode2, op2);
40033 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40034 op3 = copy_to_mode_reg (mode3, op3);
40036 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40038 error ("the last argument must be scale 1, 2, 4, 8");
40039 return const0_rtx;
40042 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40043 if (! pat)
40044 return const0_rtx;
40046 emit_insn (pat);
40047 return 0;
40049 vec_prefetch_gen:
40050 arg0 = CALL_EXPR_ARG (exp, 0);
40051 arg1 = CALL_EXPR_ARG (exp, 1);
40052 arg2 = CALL_EXPR_ARG (exp, 2);
40053 arg3 = CALL_EXPR_ARG (exp, 3);
40054 arg4 = CALL_EXPR_ARG (exp, 4);
40055 op0 = expand_normal (arg0);
40056 op1 = expand_normal (arg1);
40057 op2 = expand_normal (arg2);
40058 op3 = expand_normal (arg3);
40059 op4 = expand_normal (arg4);
40060 mode0 = insn_data[icode].operand[0].mode;
40061 mode1 = insn_data[icode].operand[1].mode;
40062 mode3 = insn_data[icode].operand[3].mode;
40063 mode4 = insn_data[icode].operand[4].mode;
40065 op0 = fixup_modeless_constant (op0, mode0);
40067 if (GET_MODE (op0) == mode0
40068 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40070 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40071 op0 = copy_to_mode_reg (mode0, op0);
40073 else if (op0 != constm1_rtx)
40075 op0 = copy_to_reg (op0);
40076 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40079 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40080 op1 = copy_to_mode_reg (mode1, op1);
40082 /* Force memory operand only with base register here. But we
40083 don't want to do it on memory operand for other builtin
40084 functions. */
40085 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40087 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40088 op2 = copy_to_mode_reg (Pmode, op2);
40090 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40092 error ("the forth argument must be scale 1, 2, 4, 8");
40093 return const0_rtx;
40096 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40098 error ("incorrect hint operand");
40099 return const0_rtx;
40102 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40103 if (! pat)
40104 return const0_rtx;
40106 emit_insn (pat);
40108 return 0;
40110 case IX86_BUILTIN_XABORT:
40111 icode = CODE_FOR_xabort;
40112 arg0 = CALL_EXPR_ARG (exp, 0);
40113 op0 = expand_normal (arg0);
40114 mode0 = insn_data[icode].operand[0].mode;
40115 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40117 error ("the xabort's argument must be an 8-bit immediate");
40118 return const0_rtx;
40120 emit_insn (gen_xabort (op0));
40121 return 0;
40123 default:
40124 break;
40127 for (i = 0, d = bdesc_special_args;
40128 i < ARRAY_SIZE (bdesc_special_args);
40129 i++, d++)
40130 if (d->code == fcode)
40131 return ix86_expand_special_args_builtin (d, exp, target);
40133 for (i = 0, d = bdesc_args;
40134 i < ARRAY_SIZE (bdesc_args);
40135 i++, d++)
40136 if (d->code == fcode)
40137 switch (fcode)
40139 case IX86_BUILTIN_FABSQ:
40140 case IX86_BUILTIN_COPYSIGNQ:
40141 if (!TARGET_SSE)
40142 /* Emit a normal call if SSE isn't available. */
40143 return expand_call (exp, target, ignore);
40144 default:
40145 return ix86_expand_args_builtin (d, exp, target);
40148 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40149 if (d->code == fcode)
40150 return ix86_expand_sse_comi (d, exp, target);
40152 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40153 if (d->code == fcode)
40154 return ix86_expand_round_builtin (d, exp, target);
40156 for (i = 0, d = bdesc_pcmpestr;
40157 i < ARRAY_SIZE (bdesc_pcmpestr);
40158 i++, d++)
40159 if (d->code == fcode)
40160 return ix86_expand_sse_pcmpestr (d, exp, target);
40162 for (i = 0, d = bdesc_pcmpistr;
40163 i < ARRAY_SIZE (bdesc_pcmpistr);
40164 i++, d++)
40165 if (d->code == fcode)
40166 return ix86_expand_sse_pcmpistr (d, exp, target);
40168 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40169 if (d->code == fcode)
40170 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40171 (enum ix86_builtin_func_type)
40172 d->flag, d->comparison);
40174 gcc_unreachable ();
40177 /* This returns the target-specific builtin with code CODE if
40178 current_function_decl has visibility on this builtin, which is checked
40179 using isa flags. Returns NULL_TREE otherwise. */
40181 static tree ix86_get_builtin (enum ix86_builtins code)
40183 struct cl_target_option *opts;
40184 tree target_tree = NULL_TREE;
40186 /* Determine the isa flags of current_function_decl. */
40188 if (current_function_decl)
40189 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40191 if (target_tree == NULL)
40192 target_tree = target_option_default_node;
40194 opts = TREE_TARGET_OPTION (target_tree);
40196 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40197 return ix86_builtin_decl (code, true);
40198 else
40199 return NULL_TREE;
40202 /* Return function decl for target specific builtin
40203 for given MPX builtin passed i FCODE. */
40204 static tree
40205 ix86_builtin_mpx_function (unsigned fcode)
40207 switch (fcode)
40209 case BUILT_IN_CHKP_BNDMK:
40210 return ix86_builtins[IX86_BUILTIN_BNDMK];
40212 case BUILT_IN_CHKP_BNDSTX:
40213 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40215 case BUILT_IN_CHKP_BNDLDX:
40216 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40218 case BUILT_IN_CHKP_BNDCL:
40219 return ix86_builtins[IX86_BUILTIN_BNDCL];
40221 case BUILT_IN_CHKP_BNDCU:
40222 return ix86_builtins[IX86_BUILTIN_BNDCU];
40224 case BUILT_IN_CHKP_BNDRET:
40225 return ix86_builtins[IX86_BUILTIN_BNDRET];
40227 case BUILT_IN_CHKP_INTERSECT:
40228 return ix86_builtins[IX86_BUILTIN_BNDINT];
40230 case BUILT_IN_CHKP_NARROW:
40231 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40233 case BUILT_IN_CHKP_SIZEOF:
40234 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40236 case BUILT_IN_CHKP_EXTRACT_LOWER:
40237 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40239 case BUILT_IN_CHKP_EXTRACT_UPPER:
40240 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40242 default:
40243 return NULL_TREE;
40246 gcc_unreachable ();
40249 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40251 Return an address to be used to load/store bounds for pointer
40252 passed in SLOT.
40254 SLOT_NO is an integer constant holding number of a target
40255 dependent special slot to be used in case SLOT is not a memory.
40257 SPECIAL_BASE is a pointer to be used as a base of fake address
40258 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40259 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40261 static rtx
40262 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40264 rtx addr = NULL;
40266 /* NULL slot means we pass bounds for pointer not passed to the
40267 function at all. Register slot means we pass pointer in a
40268 register. In both these cases bounds are passed via Bounds
40269 Table. Since we do not have actual pointer stored in memory,
40270 we have to use fake addresses to access Bounds Table. We
40271 start with (special_base - sizeof (void*)) and decrease this
40272 address by pointer size to get addresses for other slots. */
40273 if (!slot || REG_P (slot))
40275 gcc_assert (CONST_INT_P (slot_no));
40276 addr = plus_constant (Pmode, special_base,
40277 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40279 /* If pointer is passed in a memory then its address is used to
40280 access Bounds Table. */
40281 else if (MEM_P (slot))
40283 addr = XEXP (slot, 0);
40284 if (!register_operand (addr, Pmode))
40285 addr = copy_addr_to_reg (addr);
40287 else
40288 gcc_unreachable ();
40290 return addr;
40293 /* Expand pass uses this hook to load bounds for function parameter
40294 PTR passed in SLOT in case its bounds are not passed in a register.
40296 If SLOT is a memory, then bounds are loaded as for regular pointer
40297 loaded from memory. PTR may be NULL in case SLOT is a memory.
40298 In such case value of PTR (if required) may be loaded from SLOT.
40300 If SLOT is NULL or a register then SLOT_NO is an integer constant
40301 holding number of the target dependent special slot which should be
40302 used to obtain bounds.
40304 Return loaded bounds. */
40306 static rtx
40307 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40309 rtx reg = gen_reg_rtx (BNDmode);
40310 rtx addr;
40312 /* Get address to be used to access Bounds Table. Special slots start
40313 at the location of return address of the current function. */
40314 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40316 /* Load pointer value from a memory if we don't have it. */
40317 if (!ptr)
40319 gcc_assert (MEM_P (slot));
40320 ptr = copy_addr_to_reg (slot);
40323 emit_insn (BNDmode == BND64mode
40324 ? gen_bnd64_ldx (reg, addr, ptr)
40325 : gen_bnd32_ldx (reg, addr, ptr));
40327 return reg;
40330 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40331 passed in SLOT in case BOUNDS are not passed in a register.
40333 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40334 stored in memory. PTR may be NULL in case SLOT is a memory.
40335 In such case value of PTR (if required) may be loaded from SLOT.
40337 If SLOT is NULL or a register then SLOT_NO is an integer constant
40338 holding number of the target dependent special slot which should be
40339 used to store BOUNDS. */
40341 static void
40342 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40344 rtx addr;
40346 /* Get address to be used to access Bounds Table. Special slots start
40347 at the location of return address of a called function. */
40348 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40350 /* Load pointer value from a memory if we don't have it. */
40351 if (!ptr)
40353 gcc_assert (MEM_P (slot));
40354 ptr = copy_addr_to_reg (slot);
40357 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40358 if (!register_operand (bounds, BNDmode))
40359 bounds = copy_to_mode_reg (BNDmode, bounds);
40361 emit_insn (BNDmode == BND64mode
40362 ? gen_bnd64_stx (addr, ptr, bounds)
40363 : gen_bnd32_stx (addr, ptr, bounds));
40366 /* Load and return bounds returned by function in SLOT. */
40368 static rtx
40369 ix86_load_returned_bounds (rtx slot)
40371 rtx res;
40373 gcc_assert (REG_P (slot));
40374 res = gen_reg_rtx (BNDmode);
40375 emit_move_insn (res, slot);
40377 return res;
40380 /* Store BOUNDS returned by function into SLOT. */
40382 static void
40383 ix86_store_returned_bounds (rtx slot, rtx bounds)
40385 gcc_assert (REG_P (slot));
40386 emit_move_insn (slot, bounds);
40389 /* Returns a function decl for a vectorized version of the builtin function
40390 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40391 if it is not available. */
40393 static tree
40394 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40395 tree type_in)
40397 machine_mode in_mode, out_mode;
40398 int in_n, out_n;
40399 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40401 if (TREE_CODE (type_out) != VECTOR_TYPE
40402 || TREE_CODE (type_in) != VECTOR_TYPE
40403 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40404 return NULL_TREE;
40406 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40407 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40408 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40409 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40411 switch (fn)
40413 case BUILT_IN_SQRT:
40414 if (out_mode == DFmode && in_mode == DFmode)
40416 if (out_n == 2 && in_n == 2)
40417 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40418 else if (out_n == 4 && in_n == 4)
40419 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40420 else if (out_n == 8 && in_n == 8)
40421 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40423 break;
40425 case BUILT_IN_EXP2F:
40426 if (out_mode == SFmode && in_mode == SFmode)
40428 if (out_n == 16 && in_n == 16)
40429 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40431 break;
40433 case BUILT_IN_SQRTF:
40434 if (out_mode == SFmode && in_mode == SFmode)
40436 if (out_n == 4 && in_n == 4)
40437 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40438 else if (out_n == 8 && in_n == 8)
40439 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40440 else if (out_n == 16 && in_n == 16)
40441 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40443 break;
40445 case BUILT_IN_IFLOOR:
40446 case BUILT_IN_LFLOOR:
40447 case BUILT_IN_LLFLOOR:
40448 /* The round insn does not trap on denormals. */
40449 if (flag_trapping_math || !TARGET_ROUND)
40450 break;
40452 if (out_mode == SImode && in_mode == DFmode)
40454 if (out_n == 4 && in_n == 2)
40455 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40456 else if (out_n == 8 && in_n == 4)
40457 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40458 else if (out_n == 16 && in_n == 8)
40459 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40461 break;
40463 case BUILT_IN_IFLOORF:
40464 case BUILT_IN_LFLOORF:
40465 case BUILT_IN_LLFLOORF:
40466 /* The round insn does not trap on denormals. */
40467 if (flag_trapping_math || !TARGET_ROUND)
40468 break;
40470 if (out_mode == SImode && in_mode == SFmode)
40472 if (out_n == 4 && in_n == 4)
40473 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40474 else if (out_n == 8 && in_n == 8)
40475 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40477 break;
40479 case BUILT_IN_ICEIL:
40480 case BUILT_IN_LCEIL:
40481 case BUILT_IN_LLCEIL:
40482 /* The round insn does not trap on denormals. */
40483 if (flag_trapping_math || !TARGET_ROUND)
40484 break;
40486 if (out_mode == SImode && in_mode == DFmode)
40488 if (out_n == 4 && in_n == 2)
40489 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40490 else if (out_n == 8 && in_n == 4)
40491 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40492 else if (out_n == 16 && in_n == 8)
40493 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40495 break;
40497 case BUILT_IN_ICEILF:
40498 case BUILT_IN_LCEILF:
40499 case BUILT_IN_LLCEILF:
40500 /* The round insn does not trap on denormals. */
40501 if (flag_trapping_math || !TARGET_ROUND)
40502 break;
40504 if (out_mode == SImode && in_mode == SFmode)
40506 if (out_n == 4 && in_n == 4)
40507 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40508 else if (out_n == 8 && in_n == 8)
40509 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40511 break;
40513 case BUILT_IN_IRINT:
40514 case BUILT_IN_LRINT:
40515 case BUILT_IN_LLRINT:
40516 if (out_mode == SImode && in_mode == DFmode)
40518 if (out_n == 4 && in_n == 2)
40519 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40520 else if (out_n == 8 && in_n == 4)
40521 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40523 break;
40525 case BUILT_IN_IRINTF:
40526 case BUILT_IN_LRINTF:
40527 case BUILT_IN_LLRINTF:
40528 if (out_mode == SImode && in_mode == SFmode)
40530 if (out_n == 4 && in_n == 4)
40531 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40532 else if (out_n == 8 && in_n == 8)
40533 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40535 break;
40537 case BUILT_IN_IROUND:
40538 case BUILT_IN_LROUND:
40539 case BUILT_IN_LLROUND:
40540 /* The round insn does not trap on denormals. */
40541 if (flag_trapping_math || !TARGET_ROUND)
40542 break;
40544 if (out_mode == SImode && in_mode == DFmode)
40546 if (out_n == 4 && in_n == 2)
40547 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40548 else if (out_n == 8 && in_n == 4)
40549 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40550 else if (out_n == 16 && in_n == 8)
40551 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40553 break;
40555 case BUILT_IN_IROUNDF:
40556 case BUILT_IN_LROUNDF:
40557 case BUILT_IN_LLROUNDF:
40558 /* The round insn does not trap on denormals. */
40559 if (flag_trapping_math || !TARGET_ROUND)
40560 break;
40562 if (out_mode == SImode && in_mode == SFmode)
40564 if (out_n == 4 && in_n == 4)
40565 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40566 else if (out_n == 8 && in_n == 8)
40567 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40569 break;
40571 case BUILT_IN_COPYSIGN:
40572 if (out_mode == DFmode && in_mode == DFmode)
40574 if (out_n == 2 && in_n == 2)
40575 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40576 else if (out_n == 4 && in_n == 4)
40577 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40578 else if (out_n == 8 && in_n == 8)
40579 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40581 break;
40583 case BUILT_IN_COPYSIGNF:
40584 if (out_mode == SFmode && in_mode == SFmode)
40586 if (out_n == 4 && in_n == 4)
40587 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40588 else if (out_n == 8 && in_n == 8)
40589 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40590 else if (out_n == 16 && in_n == 16)
40591 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40593 break;
40595 case BUILT_IN_FLOOR:
40596 /* The round insn does not trap on denormals. */
40597 if (flag_trapping_math || !TARGET_ROUND)
40598 break;
40600 if (out_mode == DFmode && in_mode == DFmode)
40602 if (out_n == 2 && in_n == 2)
40603 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40604 else if (out_n == 4 && in_n == 4)
40605 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40607 break;
40609 case BUILT_IN_FLOORF:
40610 /* The round insn does not trap on denormals. */
40611 if (flag_trapping_math || !TARGET_ROUND)
40612 break;
40614 if (out_mode == SFmode && in_mode == SFmode)
40616 if (out_n == 4 && in_n == 4)
40617 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40618 else if (out_n == 8 && in_n == 8)
40619 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40621 break;
40623 case BUILT_IN_CEIL:
40624 /* The round insn does not trap on denormals. */
40625 if (flag_trapping_math || !TARGET_ROUND)
40626 break;
40628 if (out_mode == DFmode && in_mode == DFmode)
40630 if (out_n == 2 && in_n == 2)
40631 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40632 else if (out_n == 4 && in_n == 4)
40633 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40635 break;
40637 case BUILT_IN_CEILF:
40638 /* The round insn does not trap on denormals. */
40639 if (flag_trapping_math || !TARGET_ROUND)
40640 break;
40642 if (out_mode == SFmode && in_mode == SFmode)
40644 if (out_n == 4 && in_n == 4)
40645 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40646 else if (out_n == 8 && in_n == 8)
40647 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40649 break;
40651 case BUILT_IN_TRUNC:
40652 /* The round insn does not trap on denormals. */
40653 if (flag_trapping_math || !TARGET_ROUND)
40654 break;
40656 if (out_mode == DFmode && in_mode == DFmode)
40658 if (out_n == 2 && in_n == 2)
40659 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40660 else if (out_n == 4 && in_n == 4)
40661 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40663 break;
40665 case BUILT_IN_TRUNCF:
40666 /* The round insn does not trap on denormals. */
40667 if (flag_trapping_math || !TARGET_ROUND)
40668 break;
40670 if (out_mode == SFmode && in_mode == SFmode)
40672 if (out_n == 4 && in_n == 4)
40673 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40674 else if (out_n == 8 && in_n == 8)
40675 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40677 break;
40679 case BUILT_IN_RINT:
40680 /* The round insn does not trap on denormals. */
40681 if (flag_trapping_math || !TARGET_ROUND)
40682 break;
40684 if (out_mode == DFmode && in_mode == DFmode)
40686 if (out_n == 2 && in_n == 2)
40687 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40688 else if (out_n == 4 && in_n == 4)
40689 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40691 break;
40693 case BUILT_IN_RINTF:
40694 /* The round insn does not trap on denormals. */
40695 if (flag_trapping_math || !TARGET_ROUND)
40696 break;
40698 if (out_mode == SFmode && in_mode == SFmode)
40700 if (out_n == 4 && in_n == 4)
40701 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40702 else if (out_n == 8 && in_n == 8)
40703 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40705 break;
40707 case BUILT_IN_ROUND:
40708 /* The round insn does not trap on denormals. */
40709 if (flag_trapping_math || !TARGET_ROUND)
40710 break;
40712 if (out_mode == DFmode && in_mode == DFmode)
40714 if (out_n == 2 && in_n == 2)
40715 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40716 else if (out_n == 4 && in_n == 4)
40717 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40719 break;
40721 case BUILT_IN_ROUNDF:
40722 /* The round insn does not trap on denormals. */
40723 if (flag_trapping_math || !TARGET_ROUND)
40724 break;
40726 if (out_mode == SFmode && in_mode == SFmode)
40728 if (out_n == 4 && in_n == 4)
40729 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40730 else if (out_n == 8 && in_n == 8)
40731 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40733 break;
40735 case BUILT_IN_FMA:
40736 if (out_mode == DFmode && in_mode == DFmode)
40738 if (out_n == 2 && in_n == 2)
40739 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40740 if (out_n == 4 && in_n == 4)
40741 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40743 break;
40745 case BUILT_IN_FMAF:
40746 if (out_mode == SFmode && in_mode == SFmode)
40748 if (out_n == 4 && in_n == 4)
40749 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40750 if (out_n == 8 && in_n == 8)
40751 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40753 break;
40755 default:
40756 break;
40759 /* Dispatch to a handler for a vectorization library. */
40760 if (ix86_veclib_handler)
40761 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40762 type_in);
40764 return NULL_TREE;
40767 /* Handler for an SVML-style interface to
40768 a library with vectorized intrinsics. */
40770 static tree
40771 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40773 char name[20];
40774 tree fntype, new_fndecl, args;
40775 unsigned arity;
40776 const char *bname;
40777 machine_mode el_mode, in_mode;
40778 int n, in_n;
40780 /* The SVML is suitable for unsafe math only. */
40781 if (!flag_unsafe_math_optimizations)
40782 return NULL_TREE;
40784 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40785 n = TYPE_VECTOR_SUBPARTS (type_out);
40786 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40787 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40788 if (el_mode != in_mode
40789 || n != in_n)
40790 return NULL_TREE;
40792 switch (fn)
40794 case BUILT_IN_EXP:
40795 case BUILT_IN_LOG:
40796 case BUILT_IN_LOG10:
40797 case BUILT_IN_POW:
40798 case BUILT_IN_TANH:
40799 case BUILT_IN_TAN:
40800 case BUILT_IN_ATAN:
40801 case BUILT_IN_ATAN2:
40802 case BUILT_IN_ATANH:
40803 case BUILT_IN_CBRT:
40804 case BUILT_IN_SINH:
40805 case BUILT_IN_SIN:
40806 case BUILT_IN_ASINH:
40807 case BUILT_IN_ASIN:
40808 case BUILT_IN_COSH:
40809 case BUILT_IN_COS:
40810 case BUILT_IN_ACOSH:
40811 case BUILT_IN_ACOS:
40812 if (el_mode != DFmode || n != 2)
40813 return NULL_TREE;
40814 break;
40816 case BUILT_IN_EXPF:
40817 case BUILT_IN_LOGF:
40818 case BUILT_IN_LOG10F:
40819 case BUILT_IN_POWF:
40820 case BUILT_IN_TANHF:
40821 case BUILT_IN_TANF:
40822 case BUILT_IN_ATANF:
40823 case BUILT_IN_ATAN2F:
40824 case BUILT_IN_ATANHF:
40825 case BUILT_IN_CBRTF:
40826 case BUILT_IN_SINHF:
40827 case BUILT_IN_SINF:
40828 case BUILT_IN_ASINHF:
40829 case BUILT_IN_ASINF:
40830 case BUILT_IN_COSHF:
40831 case BUILT_IN_COSF:
40832 case BUILT_IN_ACOSHF:
40833 case BUILT_IN_ACOSF:
40834 if (el_mode != SFmode || n != 4)
40835 return NULL_TREE;
40836 break;
40838 default:
40839 return NULL_TREE;
40842 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40844 if (fn == BUILT_IN_LOGF)
40845 strcpy (name, "vmlsLn4");
40846 else if (fn == BUILT_IN_LOG)
40847 strcpy (name, "vmldLn2");
40848 else if (n == 4)
40850 sprintf (name, "vmls%s", bname+10);
40851 name[strlen (name)-1] = '4';
40853 else
40854 sprintf (name, "vmld%s2", bname+10);
40856 /* Convert to uppercase. */
40857 name[4] &= ~0x20;
40859 arity = 0;
40860 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40861 args;
40862 args = TREE_CHAIN (args))
40863 arity++;
40865 if (arity == 1)
40866 fntype = build_function_type_list (type_out, type_in, NULL);
40867 else
40868 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40870 /* Build a function declaration for the vectorized function. */
40871 new_fndecl = build_decl (BUILTINS_LOCATION,
40872 FUNCTION_DECL, get_identifier (name), fntype);
40873 TREE_PUBLIC (new_fndecl) = 1;
40874 DECL_EXTERNAL (new_fndecl) = 1;
40875 DECL_IS_NOVOPS (new_fndecl) = 1;
40876 TREE_READONLY (new_fndecl) = 1;
40878 return new_fndecl;
40881 /* Handler for an ACML-style interface to
40882 a library with vectorized intrinsics. */
40884 static tree
40885 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40887 char name[20] = "__vr.._";
40888 tree fntype, new_fndecl, args;
40889 unsigned arity;
40890 const char *bname;
40891 machine_mode el_mode, in_mode;
40892 int n, in_n;
40894 /* The ACML is 64bits only and suitable for unsafe math only as
40895 it does not correctly support parts of IEEE with the required
40896 precision such as denormals. */
40897 if (!TARGET_64BIT
40898 || !flag_unsafe_math_optimizations)
40899 return NULL_TREE;
40901 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40902 n = TYPE_VECTOR_SUBPARTS (type_out);
40903 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40904 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40905 if (el_mode != in_mode
40906 || n != in_n)
40907 return NULL_TREE;
40909 switch (fn)
40911 case BUILT_IN_SIN:
40912 case BUILT_IN_COS:
40913 case BUILT_IN_EXP:
40914 case BUILT_IN_LOG:
40915 case BUILT_IN_LOG2:
40916 case BUILT_IN_LOG10:
40917 name[4] = 'd';
40918 name[5] = '2';
40919 if (el_mode != DFmode
40920 || n != 2)
40921 return NULL_TREE;
40922 break;
40924 case BUILT_IN_SINF:
40925 case BUILT_IN_COSF:
40926 case BUILT_IN_EXPF:
40927 case BUILT_IN_POWF:
40928 case BUILT_IN_LOGF:
40929 case BUILT_IN_LOG2F:
40930 case BUILT_IN_LOG10F:
40931 name[4] = 's';
40932 name[5] = '4';
40933 if (el_mode != SFmode
40934 || n != 4)
40935 return NULL_TREE;
40936 break;
40938 default:
40939 return NULL_TREE;
40942 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40943 sprintf (name + 7, "%s", bname+10);
40945 arity = 0;
40946 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40947 args;
40948 args = TREE_CHAIN (args))
40949 arity++;
40951 if (arity == 1)
40952 fntype = build_function_type_list (type_out, type_in, NULL);
40953 else
40954 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40956 /* Build a function declaration for the vectorized function. */
40957 new_fndecl = build_decl (BUILTINS_LOCATION,
40958 FUNCTION_DECL, get_identifier (name), fntype);
40959 TREE_PUBLIC (new_fndecl) = 1;
40960 DECL_EXTERNAL (new_fndecl) = 1;
40961 DECL_IS_NOVOPS (new_fndecl) = 1;
40962 TREE_READONLY (new_fndecl) = 1;
40964 return new_fndecl;
40967 /* Returns a decl of a function that implements gather load with
40968 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40969 Return NULL_TREE if it is not available. */
40971 static tree
40972 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40973 const_tree index_type, int scale)
40975 bool si;
40976 enum ix86_builtins code;
40978 if (! TARGET_AVX2)
40979 return NULL_TREE;
40981 if ((TREE_CODE (index_type) != INTEGER_TYPE
40982 && !POINTER_TYPE_P (index_type))
40983 || (TYPE_MODE (index_type) != SImode
40984 && TYPE_MODE (index_type) != DImode))
40985 return NULL_TREE;
40987 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40988 return NULL_TREE;
40990 /* v*gather* insn sign extends index to pointer mode. */
40991 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40992 && TYPE_UNSIGNED (index_type))
40993 return NULL_TREE;
40995 if (scale <= 0
40996 || scale > 8
40997 || (scale & (scale - 1)) != 0)
40998 return NULL_TREE;
41000 si = TYPE_MODE (index_type) == SImode;
41001 switch (TYPE_MODE (mem_vectype))
41003 case V2DFmode:
41004 if (TARGET_AVX512VL)
41005 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41006 else
41007 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41008 break;
41009 case V4DFmode:
41010 if (TARGET_AVX512VL)
41011 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41012 else
41013 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41014 break;
41015 case V2DImode:
41016 if (TARGET_AVX512VL)
41017 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41018 else
41019 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41020 break;
41021 case V4DImode:
41022 if (TARGET_AVX512VL)
41023 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41024 else
41025 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41026 break;
41027 case V4SFmode:
41028 if (TARGET_AVX512VL)
41029 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41030 else
41031 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41032 break;
41033 case V8SFmode:
41034 if (TARGET_AVX512VL)
41035 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41036 else
41037 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41038 break;
41039 case V4SImode:
41040 if (TARGET_AVX512VL)
41041 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41042 else
41043 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41044 break;
41045 case V8SImode:
41046 if (TARGET_AVX512VL)
41047 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41048 else
41049 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41050 break;
41051 case V8DFmode:
41052 if (TARGET_AVX512F)
41053 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41054 else
41055 return NULL_TREE;
41056 break;
41057 case V8DImode:
41058 if (TARGET_AVX512F)
41059 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41060 else
41061 return NULL_TREE;
41062 break;
41063 case V16SFmode:
41064 if (TARGET_AVX512F)
41065 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41066 else
41067 return NULL_TREE;
41068 break;
41069 case V16SImode:
41070 if (TARGET_AVX512F)
41071 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41072 else
41073 return NULL_TREE;
41074 break;
41075 default:
41076 return NULL_TREE;
41079 return ix86_get_builtin (code);
41082 /* Returns a code for a target-specific builtin that implements
41083 reciprocal of the function, or NULL_TREE if not available. */
41085 static tree
41086 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41088 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41089 && flag_finite_math_only && !flag_trapping_math
41090 && flag_unsafe_math_optimizations))
41091 return NULL_TREE;
41093 if (md_fn)
41094 /* Machine dependent builtins. */
41095 switch (fn)
41097 /* Vectorized version of sqrt to rsqrt conversion. */
41098 case IX86_BUILTIN_SQRTPS_NR:
41099 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41101 case IX86_BUILTIN_SQRTPS_NR256:
41102 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41104 default:
41105 return NULL_TREE;
41107 else
41108 /* Normal builtins. */
41109 switch (fn)
41111 /* Sqrt to rsqrt conversion. */
41112 case BUILT_IN_SQRTF:
41113 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41115 default:
41116 return NULL_TREE;
41120 /* Helper for avx_vpermilps256_operand et al. This is also used by
41121 the expansion functions to turn the parallel back into a mask.
41122 The return value is 0 for no match and the imm8+1 for a match. */
41125 avx_vpermilp_parallel (rtx par, machine_mode mode)
41127 unsigned i, nelt = GET_MODE_NUNITS (mode);
41128 unsigned mask = 0;
41129 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41131 if (XVECLEN (par, 0) != (int) nelt)
41132 return 0;
41134 /* Validate that all of the elements are constants, and not totally
41135 out of range. Copy the data into an integral array to make the
41136 subsequent checks easier. */
41137 for (i = 0; i < nelt; ++i)
41139 rtx er = XVECEXP (par, 0, i);
41140 unsigned HOST_WIDE_INT ei;
41142 if (!CONST_INT_P (er))
41143 return 0;
41144 ei = INTVAL (er);
41145 if (ei >= nelt)
41146 return 0;
41147 ipar[i] = ei;
41150 switch (mode)
41152 case V8DFmode:
41153 /* In the 512-bit DFmode case, we can only move elements within
41154 a 128-bit lane. First fill the second part of the mask,
41155 then fallthru. */
41156 for (i = 4; i < 6; ++i)
41158 if (ipar[i] < 4 || ipar[i] >= 6)
41159 return 0;
41160 mask |= (ipar[i] - 4) << i;
41162 for (i = 6; i < 8; ++i)
41164 if (ipar[i] < 6)
41165 return 0;
41166 mask |= (ipar[i] - 6) << i;
41168 /* FALLTHRU */
41170 case V4DFmode:
41171 /* In the 256-bit DFmode case, we can only move elements within
41172 a 128-bit lane. */
41173 for (i = 0; i < 2; ++i)
41175 if (ipar[i] >= 2)
41176 return 0;
41177 mask |= ipar[i] << i;
41179 for (i = 2; i < 4; ++i)
41181 if (ipar[i] < 2)
41182 return 0;
41183 mask |= (ipar[i] - 2) << i;
41185 break;
41187 case V16SFmode:
41188 /* In 512 bit SFmode case, permutation in the upper 256 bits
41189 must mirror the permutation in the lower 256-bits. */
41190 for (i = 0; i < 8; ++i)
41191 if (ipar[i] + 8 != ipar[i + 8])
41192 return 0;
41193 /* FALLTHRU */
41195 case V8SFmode:
41196 /* In 256 bit SFmode case, we have full freedom of
41197 movement within the low 128-bit lane, but the high 128-bit
41198 lane must mirror the exact same pattern. */
41199 for (i = 0; i < 4; ++i)
41200 if (ipar[i] + 4 != ipar[i + 4])
41201 return 0;
41202 nelt = 4;
41203 /* FALLTHRU */
41205 case V2DFmode:
41206 case V4SFmode:
41207 /* In the 128-bit case, we've full freedom in the placement of
41208 the elements from the source operand. */
41209 for (i = 0; i < nelt; ++i)
41210 mask |= ipar[i] << (i * (nelt / 2));
41211 break;
41213 default:
41214 gcc_unreachable ();
41217 /* Make sure success has a non-zero value by adding one. */
41218 return mask + 1;
41221 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41222 the expansion functions to turn the parallel back into a mask.
41223 The return value is 0 for no match and the imm8+1 for a match. */
41226 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41228 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41229 unsigned mask = 0;
41230 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41232 if (XVECLEN (par, 0) != (int) nelt)
41233 return 0;
41235 /* Validate that all of the elements are constants, and not totally
41236 out of range. Copy the data into an integral array to make the
41237 subsequent checks easier. */
41238 for (i = 0; i < nelt; ++i)
41240 rtx er = XVECEXP (par, 0, i);
41241 unsigned HOST_WIDE_INT ei;
41243 if (!CONST_INT_P (er))
41244 return 0;
41245 ei = INTVAL (er);
41246 if (ei >= 2 * nelt)
41247 return 0;
41248 ipar[i] = ei;
41251 /* Validate that the halves of the permute are halves. */
41252 for (i = 0; i < nelt2 - 1; ++i)
41253 if (ipar[i] + 1 != ipar[i + 1])
41254 return 0;
41255 for (i = nelt2; i < nelt - 1; ++i)
41256 if (ipar[i] + 1 != ipar[i + 1])
41257 return 0;
41259 /* Reconstruct the mask. */
41260 for (i = 0; i < 2; ++i)
41262 unsigned e = ipar[i * nelt2];
41263 if (e % nelt2)
41264 return 0;
41265 e /= nelt2;
41266 mask |= e << (i * 4);
41269 /* Make sure success has a non-zero value by adding one. */
41270 return mask + 1;
41273 /* Return a register priority for hard reg REGNO. */
41274 static int
41275 ix86_register_priority (int hard_regno)
41277 /* ebp and r13 as the base always wants a displacement, r12 as the
41278 base always wants an index. So discourage their usage in an
41279 address. */
41280 if (hard_regno == R12_REG || hard_regno == R13_REG)
41281 return 0;
41282 if (hard_regno == BP_REG)
41283 return 1;
41284 /* New x86-64 int registers result in bigger code size. Discourage
41285 them. */
41286 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41287 return 2;
41288 /* New x86-64 SSE registers result in bigger code size. Discourage
41289 them. */
41290 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41291 return 2;
41292 /* Usage of AX register results in smaller code. Prefer it. */
41293 if (hard_regno == AX_REG)
41294 return 4;
41295 return 3;
41298 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41300 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41301 QImode must go into class Q_REGS.
41302 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41303 movdf to do mem-to-mem moves through integer regs. */
41305 static reg_class_t
41306 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41308 machine_mode mode = GET_MODE (x);
41310 /* We're only allowed to return a subclass of CLASS. Many of the
41311 following checks fail for NO_REGS, so eliminate that early. */
41312 if (regclass == NO_REGS)
41313 return NO_REGS;
41315 /* All classes can load zeros. */
41316 if (x == CONST0_RTX (mode))
41317 return regclass;
41319 /* Force constants into memory if we are loading a (nonzero) constant into
41320 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41321 instructions to load from a constant. */
41322 if (CONSTANT_P (x)
41323 && (MAYBE_MMX_CLASS_P (regclass)
41324 || MAYBE_SSE_CLASS_P (regclass)
41325 || MAYBE_MASK_CLASS_P (regclass)))
41326 return NO_REGS;
41328 /* Prefer SSE regs only, if we can use them for math. */
41329 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41330 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41332 /* Floating-point constants need more complex checks. */
41333 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41335 /* General regs can load everything. */
41336 if (reg_class_subset_p (regclass, GENERAL_REGS))
41337 return regclass;
41339 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41340 zero above. We only want to wind up preferring 80387 registers if
41341 we plan on doing computation with them. */
41342 if (TARGET_80387
41343 && standard_80387_constant_p (x) > 0)
41345 /* Limit class to non-sse. */
41346 if (regclass == FLOAT_SSE_REGS)
41347 return FLOAT_REGS;
41348 if (regclass == FP_TOP_SSE_REGS)
41349 return FP_TOP_REG;
41350 if (regclass == FP_SECOND_SSE_REGS)
41351 return FP_SECOND_REG;
41352 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41353 return regclass;
41356 return NO_REGS;
41359 /* Generally when we see PLUS here, it's the function invariant
41360 (plus soft-fp const_int). Which can only be computed into general
41361 regs. */
41362 if (GET_CODE (x) == PLUS)
41363 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41365 /* QImode constants are easy to load, but non-constant QImode data
41366 must go into Q_REGS. */
41367 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41369 if (reg_class_subset_p (regclass, Q_REGS))
41370 return regclass;
41371 if (reg_class_subset_p (Q_REGS, regclass))
41372 return Q_REGS;
41373 return NO_REGS;
41376 return regclass;
41379 /* Discourage putting floating-point values in SSE registers unless
41380 SSE math is being used, and likewise for the 387 registers. */
41381 static reg_class_t
41382 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41384 machine_mode mode = GET_MODE (x);
41386 /* Restrict the output reload class to the register bank that we are doing
41387 math on. If we would like not to return a subset of CLASS, reject this
41388 alternative: if reload cannot do this, it will still use its choice. */
41389 mode = GET_MODE (x);
41390 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41391 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41393 if (X87_FLOAT_MODE_P (mode))
41395 if (regclass == FP_TOP_SSE_REGS)
41396 return FP_TOP_REG;
41397 else if (regclass == FP_SECOND_SSE_REGS)
41398 return FP_SECOND_REG;
41399 else
41400 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41403 return regclass;
41406 static reg_class_t
41407 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41408 machine_mode mode, secondary_reload_info *sri)
41410 /* Double-word spills from general registers to non-offsettable memory
41411 references (zero-extended addresses) require special handling. */
41412 if (TARGET_64BIT
41413 && MEM_P (x)
41414 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41415 && INTEGER_CLASS_P (rclass)
41416 && !offsettable_memref_p (x))
41418 sri->icode = (in_p
41419 ? CODE_FOR_reload_noff_load
41420 : CODE_FOR_reload_noff_store);
41421 /* Add the cost of moving address to a temporary. */
41422 sri->extra_cost = 1;
41424 return NO_REGS;
41427 /* QImode spills from non-QI registers require
41428 intermediate register on 32bit targets. */
41429 if (mode == QImode
41430 && (MAYBE_MASK_CLASS_P (rclass)
41431 || (!TARGET_64BIT && !in_p
41432 && INTEGER_CLASS_P (rclass)
41433 && MAYBE_NON_Q_CLASS_P (rclass))))
41435 int regno;
41437 if (REG_P (x))
41438 regno = REGNO (x);
41439 else
41440 regno = -1;
41442 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41443 regno = true_regnum (x);
41445 /* Return Q_REGS if the operand is in memory. */
41446 if (regno == -1)
41447 return Q_REGS;
41450 /* This condition handles corner case where an expression involving
41451 pointers gets vectorized. We're trying to use the address of a
41452 stack slot as a vector initializer.
41454 (set (reg:V2DI 74 [ vect_cst_.2 ])
41455 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41457 Eventually frame gets turned into sp+offset like this:
41459 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41460 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41461 (const_int 392 [0x188]))))
41463 That later gets turned into:
41465 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41466 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41467 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41469 We'll have the following reload recorded:
41471 Reload 0: reload_in (DI) =
41472 (plus:DI (reg/f:DI 7 sp)
41473 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41474 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41475 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41476 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41477 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41478 reload_reg_rtx: (reg:V2DI 22 xmm1)
41480 Which isn't going to work since SSE instructions can't handle scalar
41481 additions. Returning GENERAL_REGS forces the addition into integer
41482 register and reload can handle subsequent reloads without problems. */
41484 if (in_p && GET_CODE (x) == PLUS
41485 && SSE_CLASS_P (rclass)
41486 && SCALAR_INT_MODE_P (mode))
41487 return GENERAL_REGS;
41489 return NO_REGS;
41492 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41494 static bool
41495 ix86_class_likely_spilled_p (reg_class_t rclass)
41497 switch (rclass)
41499 case AREG:
41500 case DREG:
41501 case CREG:
41502 case BREG:
41503 case AD_REGS:
41504 case SIREG:
41505 case DIREG:
41506 case SSE_FIRST_REG:
41507 case FP_TOP_REG:
41508 case FP_SECOND_REG:
41509 case BND_REGS:
41510 return true;
41512 default:
41513 break;
41516 return false;
41519 /* If we are copying between general and FP registers, we need a memory
41520 location. The same is true for SSE and MMX registers.
41522 To optimize register_move_cost performance, allow inline variant.
41524 The macro can't work reliably when one of the CLASSES is class containing
41525 registers from multiple units (SSE, MMX, integer). We avoid this by never
41526 combining those units in single alternative in the machine description.
41527 Ensure that this constraint holds to avoid unexpected surprises.
41529 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41530 enforce these sanity checks. */
41532 static inline bool
41533 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41534 machine_mode mode, int strict)
41536 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41537 return false;
41538 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41539 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41540 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41541 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41542 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41543 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41545 gcc_assert (!strict || lra_in_progress);
41546 return true;
41549 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41550 return true;
41552 /* Between mask and general, we have moves no larger than word size. */
41553 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41554 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41555 return true;
41557 /* ??? This is a lie. We do have moves between mmx/general, and for
41558 mmx/sse2. But by saying we need secondary memory we discourage the
41559 register allocator from using the mmx registers unless needed. */
41560 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41561 return true;
41563 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41565 /* SSE1 doesn't have any direct moves from other classes. */
41566 if (!TARGET_SSE2)
41567 return true;
41569 /* If the target says that inter-unit moves are more expensive
41570 than moving through memory, then don't generate them. */
41571 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41572 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41573 return true;
41575 /* Between SSE and general, we have moves no larger than word size. */
41576 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41577 return true;
41580 return false;
41583 bool
41584 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41585 machine_mode mode, int strict)
41587 return inline_secondary_memory_needed (class1, class2, mode, strict);
41590 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41592 On the 80386, this is the size of MODE in words,
41593 except in the FP regs, where a single reg is always enough. */
41595 static unsigned char
41596 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41598 if (MAYBE_INTEGER_CLASS_P (rclass))
41600 if (mode == XFmode)
41601 return (TARGET_64BIT ? 2 : 3);
41602 else if (mode == XCmode)
41603 return (TARGET_64BIT ? 4 : 6);
41604 else
41605 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41607 else
41609 if (COMPLEX_MODE_P (mode))
41610 return 2;
41611 else
41612 return 1;
41616 /* Return true if the registers in CLASS cannot represent the change from
41617 modes FROM to TO. */
41619 bool
41620 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41621 enum reg_class regclass)
41623 if (from == to)
41624 return false;
41626 /* x87 registers can't do subreg at all, as all values are reformatted
41627 to extended precision. */
41628 if (MAYBE_FLOAT_CLASS_P (regclass))
41629 return true;
41631 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41633 /* Vector registers do not support QI or HImode loads. If we don't
41634 disallow a change to these modes, reload will assume it's ok to
41635 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41636 the vec_dupv4hi pattern. */
41637 if (GET_MODE_SIZE (from) < 4)
41638 return true;
41641 return false;
41644 /* Return the cost of moving data of mode M between a
41645 register and memory. A value of 2 is the default; this cost is
41646 relative to those in `REGISTER_MOVE_COST'.
41648 This function is used extensively by register_move_cost that is used to
41649 build tables at startup. Make it inline in this case.
41650 When IN is 2, return maximum of in and out move cost.
41652 If moving between registers and memory is more expensive than
41653 between two registers, you should define this macro to express the
41654 relative cost.
41656 Model also increased moving costs of QImode registers in non
41657 Q_REGS classes.
41659 static inline int
41660 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41661 int in)
41663 int cost;
41664 if (FLOAT_CLASS_P (regclass))
41666 int index;
41667 switch (mode)
41669 case SFmode:
41670 index = 0;
41671 break;
41672 case DFmode:
41673 index = 1;
41674 break;
41675 case XFmode:
41676 index = 2;
41677 break;
41678 default:
41679 return 100;
41681 if (in == 2)
41682 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41683 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41685 if (SSE_CLASS_P (regclass))
41687 int index;
41688 switch (GET_MODE_SIZE (mode))
41690 case 4:
41691 index = 0;
41692 break;
41693 case 8:
41694 index = 1;
41695 break;
41696 case 16:
41697 index = 2;
41698 break;
41699 default:
41700 return 100;
41702 if (in == 2)
41703 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41704 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41706 if (MMX_CLASS_P (regclass))
41708 int index;
41709 switch (GET_MODE_SIZE (mode))
41711 case 4:
41712 index = 0;
41713 break;
41714 case 8:
41715 index = 1;
41716 break;
41717 default:
41718 return 100;
41720 if (in)
41721 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41722 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41724 switch (GET_MODE_SIZE (mode))
41726 case 1:
41727 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41729 if (!in)
41730 return ix86_cost->int_store[0];
41731 if (TARGET_PARTIAL_REG_DEPENDENCY
41732 && optimize_function_for_speed_p (cfun))
41733 cost = ix86_cost->movzbl_load;
41734 else
41735 cost = ix86_cost->int_load[0];
41736 if (in == 2)
41737 return MAX (cost, ix86_cost->int_store[0]);
41738 return cost;
41740 else
41742 if (in == 2)
41743 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41744 if (in)
41745 return ix86_cost->movzbl_load;
41746 else
41747 return ix86_cost->int_store[0] + 4;
41749 break;
41750 case 2:
41751 if (in == 2)
41752 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41753 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41754 default:
41755 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41756 if (mode == TFmode)
41757 mode = XFmode;
41758 if (in == 2)
41759 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41760 else if (in)
41761 cost = ix86_cost->int_load[2];
41762 else
41763 cost = ix86_cost->int_store[2];
41764 return (cost * (((int) GET_MODE_SIZE (mode)
41765 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41769 static int
41770 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41771 bool in)
41773 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41777 /* Return the cost of moving data from a register in class CLASS1 to
41778 one in class CLASS2.
41780 It is not required that the cost always equal 2 when FROM is the same as TO;
41781 on some machines it is expensive to move between registers if they are not
41782 general registers. */
41784 static int
41785 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41786 reg_class_t class2_i)
41788 enum reg_class class1 = (enum reg_class) class1_i;
41789 enum reg_class class2 = (enum reg_class) class2_i;
41791 /* In case we require secondary memory, compute cost of the store followed
41792 by load. In order to avoid bad register allocation choices, we need
41793 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41795 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41797 int cost = 1;
41799 cost += inline_memory_move_cost (mode, class1, 2);
41800 cost += inline_memory_move_cost (mode, class2, 2);
41802 /* In case of copying from general_purpose_register we may emit multiple
41803 stores followed by single load causing memory size mismatch stall.
41804 Count this as arbitrarily high cost of 20. */
41805 if (targetm.class_max_nregs (class1, mode)
41806 > targetm.class_max_nregs (class2, mode))
41807 cost += 20;
41809 /* In the case of FP/MMX moves, the registers actually overlap, and we
41810 have to switch modes in order to treat them differently. */
41811 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41812 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41813 cost += 20;
41815 return cost;
41818 /* Moves between SSE/MMX and integer unit are expensive. */
41819 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41820 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41822 /* ??? By keeping returned value relatively high, we limit the number
41823 of moves between integer and MMX/SSE registers for all targets.
41824 Additionally, high value prevents problem with x86_modes_tieable_p(),
41825 where integer modes in MMX/SSE registers are not tieable
41826 because of missing QImode and HImode moves to, from or between
41827 MMX/SSE registers. */
41828 return MAX (8, ix86_cost->mmxsse_to_integer);
41830 if (MAYBE_FLOAT_CLASS_P (class1))
41831 return ix86_cost->fp_move;
41832 if (MAYBE_SSE_CLASS_P (class1))
41833 return ix86_cost->sse_move;
41834 if (MAYBE_MMX_CLASS_P (class1))
41835 return ix86_cost->mmx_move;
41836 return 2;
41839 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41840 MODE. */
41842 bool
41843 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41845 /* Flags and only flags can only hold CCmode values. */
41846 if (CC_REGNO_P (regno))
41847 return GET_MODE_CLASS (mode) == MODE_CC;
41848 if (GET_MODE_CLASS (mode) == MODE_CC
41849 || GET_MODE_CLASS (mode) == MODE_RANDOM
41850 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41851 return false;
41852 if (STACK_REGNO_P (regno))
41853 return VALID_FP_MODE_P (mode);
41854 if (MASK_REGNO_P (regno))
41855 return (VALID_MASK_REG_MODE (mode)
41856 || (TARGET_AVX512BW
41857 && VALID_MASK_AVX512BW_MODE (mode)));
41858 if (BND_REGNO_P (regno))
41859 return VALID_BND_REG_MODE (mode);
41860 if (SSE_REGNO_P (regno))
41862 /* We implement the move patterns for all vector modes into and
41863 out of SSE registers, even when no operation instructions
41864 are available. */
41866 /* For AVX-512 we allow, regardless of regno:
41867 - XI mode
41868 - any of 512-bit wide vector mode
41869 - any scalar mode. */
41870 if (TARGET_AVX512F
41871 && (mode == XImode
41872 || VALID_AVX512F_REG_MODE (mode)
41873 || VALID_AVX512F_SCALAR_MODE (mode)))
41874 return true;
41876 /* TODO check for QI/HI scalars. */
41877 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41878 if (TARGET_AVX512VL
41879 && (mode == OImode
41880 || mode == TImode
41881 || VALID_AVX256_REG_MODE (mode)
41882 || VALID_AVX512VL_128_REG_MODE (mode)))
41883 return true;
41885 /* xmm16-xmm31 are only available for AVX-512. */
41886 if (EXT_REX_SSE_REGNO_P (regno))
41887 return false;
41889 /* OImode and AVX modes are available only when AVX is enabled. */
41890 return ((TARGET_AVX
41891 && VALID_AVX256_REG_OR_OI_MODE (mode))
41892 || VALID_SSE_REG_MODE (mode)
41893 || VALID_SSE2_REG_MODE (mode)
41894 || VALID_MMX_REG_MODE (mode)
41895 || VALID_MMX_REG_MODE_3DNOW (mode));
41897 if (MMX_REGNO_P (regno))
41899 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41900 so if the register is available at all, then we can move data of
41901 the given mode into or out of it. */
41902 return (VALID_MMX_REG_MODE (mode)
41903 || VALID_MMX_REG_MODE_3DNOW (mode));
41906 if (mode == QImode)
41908 /* Take care for QImode values - they can be in non-QI regs,
41909 but then they do cause partial register stalls. */
41910 if (ANY_QI_REGNO_P (regno))
41911 return true;
41912 if (!TARGET_PARTIAL_REG_STALL)
41913 return true;
41914 /* LRA checks if the hard register is OK for the given mode.
41915 QImode values can live in non-QI regs, so we allow all
41916 registers here. */
41917 if (lra_in_progress)
41918 return true;
41919 return !can_create_pseudo_p ();
41921 /* We handle both integer and floats in the general purpose registers. */
41922 else if (VALID_INT_MODE_P (mode))
41923 return true;
41924 else if (VALID_FP_MODE_P (mode))
41925 return true;
41926 else if (VALID_DFP_MODE_P (mode))
41927 return true;
41928 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41929 on to use that value in smaller contexts, this can easily force a
41930 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41931 supporting DImode, allow it. */
41932 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41933 return true;
41935 return false;
41938 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41939 tieable integer mode. */
41941 static bool
41942 ix86_tieable_integer_mode_p (machine_mode mode)
41944 switch (mode)
41946 case HImode:
41947 case SImode:
41948 return true;
41950 case QImode:
41951 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41953 case DImode:
41954 return TARGET_64BIT;
41956 default:
41957 return false;
41961 /* Return true if MODE1 is accessible in a register that can hold MODE2
41962 without copying. That is, all register classes that can hold MODE2
41963 can also hold MODE1. */
41965 bool
41966 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41968 if (mode1 == mode2)
41969 return true;
41971 if (ix86_tieable_integer_mode_p (mode1)
41972 && ix86_tieable_integer_mode_p (mode2))
41973 return true;
41975 /* MODE2 being XFmode implies fp stack or general regs, which means we
41976 can tie any smaller floating point modes to it. Note that we do not
41977 tie this with TFmode. */
41978 if (mode2 == XFmode)
41979 return mode1 == SFmode || mode1 == DFmode;
41981 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41982 that we can tie it with SFmode. */
41983 if (mode2 == DFmode)
41984 return mode1 == SFmode;
41986 /* If MODE2 is only appropriate for an SSE register, then tie with
41987 any other mode acceptable to SSE registers. */
41988 if (GET_MODE_SIZE (mode2) == 32
41989 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41990 return (GET_MODE_SIZE (mode1) == 32
41991 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41992 if (GET_MODE_SIZE (mode2) == 16
41993 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41994 return (GET_MODE_SIZE (mode1) == 16
41995 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41997 /* If MODE2 is appropriate for an MMX register, then tie
41998 with any other mode acceptable to MMX registers. */
41999 if (GET_MODE_SIZE (mode2) == 8
42000 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42001 return (GET_MODE_SIZE (mode1) == 8
42002 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42004 return false;
42007 /* Return the cost of moving between two registers of mode MODE. */
42009 static int
42010 ix86_set_reg_reg_cost (machine_mode mode)
42012 unsigned int units = UNITS_PER_WORD;
42014 switch (GET_MODE_CLASS (mode))
42016 default:
42017 break;
42019 case MODE_CC:
42020 units = GET_MODE_SIZE (CCmode);
42021 break;
42023 case MODE_FLOAT:
42024 if ((TARGET_SSE && mode == TFmode)
42025 || (TARGET_80387 && mode == XFmode)
42026 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42027 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42028 units = GET_MODE_SIZE (mode);
42029 break;
42031 case MODE_COMPLEX_FLOAT:
42032 if ((TARGET_SSE && mode == TCmode)
42033 || (TARGET_80387 && mode == XCmode)
42034 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42035 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42036 units = GET_MODE_SIZE (mode);
42037 break;
42039 case MODE_VECTOR_INT:
42040 case MODE_VECTOR_FLOAT:
42041 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42042 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42043 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42044 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42045 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42046 units = GET_MODE_SIZE (mode);
42049 /* Return the cost of moving between two registers of mode MODE,
42050 assuming that the move will be in pieces of at most UNITS bytes. */
42051 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42054 /* Compute a (partial) cost for rtx X. Return true if the complete
42055 cost has been computed, and false if subexpressions should be
42056 scanned. In either case, *TOTAL contains the cost result. */
42058 static bool
42059 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42060 bool speed)
42062 rtx mask;
42063 enum rtx_code code = (enum rtx_code) code_i;
42064 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42065 machine_mode mode = GET_MODE (x);
42066 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42068 switch (code)
42070 case SET:
42071 if (register_operand (SET_DEST (x), VOIDmode)
42072 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42074 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42075 return true;
42077 return false;
42079 case CONST_INT:
42080 case CONST:
42081 case LABEL_REF:
42082 case SYMBOL_REF:
42083 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42084 *total = 3;
42085 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42086 *total = 2;
42087 else if (flag_pic && SYMBOLIC_CONST (x)
42088 && !(TARGET_64BIT
42089 && (GET_CODE (x) == LABEL_REF
42090 || (GET_CODE (x) == SYMBOL_REF
42091 && SYMBOL_REF_LOCAL_P (x)))))
42092 *total = 1;
42093 else
42094 *total = 0;
42095 return true;
42097 case CONST_DOUBLE:
42098 if (mode == VOIDmode)
42100 *total = 0;
42101 return true;
42103 switch (standard_80387_constant_p (x))
42105 case 1: /* 0.0 */
42106 *total = 1;
42107 return true;
42108 default: /* Other constants */
42109 *total = 2;
42110 return true;
42111 case 0:
42112 case -1:
42113 break;
42115 if (SSE_FLOAT_MODE_P (mode))
42117 case CONST_VECTOR:
42118 switch (standard_sse_constant_p (x))
42120 case 0:
42121 break;
42122 case 1: /* 0: xor eliminates false dependency */
42123 *total = 0;
42124 return true;
42125 default: /* -1: cmp contains false dependency */
42126 *total = 1;
42127 return true;
42130 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42131 it'll probably end up. Add a penalty for size. */
42132 *total = (COSTS_N_INSNS (1)
42133 + (flag_pic != 0 && !TARGET_64BIT)
42134 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42135 return true;
42137 case ZERO_EXTEND:
42138 /* The zero extensions is often completely free on x86_64, so make
42139 it as cheap as possible. */
42140 if (TARGET_64BIT && mode == DImode
42141 && GET_MODE (XEXP (x, 0)) == SImode)
42142 *total = 1;
42143 else if (TARGET_ZERO_EXTEND_WITH_AND)
42144 *total = cost->add;
42145 else
42146 *total = cost->movzx;
42147 return false;
42149 case SIGN_EXTEND:
42150 *total = cost->movsx;
42151 return false;
42153 case ASHIFT:
42154 if (SCALAR_INT_MODE_P (mode)
42155 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42156 && CONST_INT_P (XEXP (x, 1)))
42158 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42159 if (value == 1)
42161 *total = cost->add;
42162 return false;
42164 if ((value == 2 || value == 3)
42165 && cost->lea <= cost->shift_const)
42167 *total = cost->lea;
42168 return false;
42171 /* FALLTHRU */
42173 case ROTATE:
42174 case ASHIFTRT:
42175 case LSHIFTRT:
42176 case ROTATERT:
42177 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42179 /* ??? Should be SSE vector operation cost. */
42180 /* At least for published AMD latencies, this really is the same
42181 as the latency for a simple fpu operation like fabs. */
42182 /* V*QImode is emulated with 1-11 insns. */
42183 if (mode == V16QImode || mode == V32QImode)
42185 int count = 11;
42186 if (TARGET_XOP && mode == V16QImode)
42188 /* For XOP we use vpshab, which requires a broadcast of the
42189 value to the variable shift insn. For constants this
42190 means a V16Q const in mem; even when we can perform the
42191 shift with one insn set the cost to prefer paddb. */
42192 if (CONSTANT_P (XEXP (x, 1)))
42194 *total = (cost->fabs
42195 + rtx_cost (XEXP (x, 0), code, 0, speed)
42196 + (speed ? 2 : COSTS_N_BYTES (16)));
42197 return true;
42199 count = 3;
42201 else if (TARGET_SSSE3)
42202 count = 7;
42203 *total = cost->fabs * count;
42205 else
42206 *total = cost->fabs;
42208 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42210 if (CONST_INT_P (XEXP (x, 1)))
42212 if (INTVAL (XEXP (x, 1)) > 32)
42213 *total = cost->shift_const + COSTS_N_INSNS (2);
42214 else
42215 *total = cost->shift_const * 2;
42217 else
42219 if (GET_CODE (XEXP (x, 1)) == AND)
42220 *total = cost->shift_var * 2;
42221 else
42222 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42225 else
42227 if (CONST_INT_P (XEXP (x, 1)))
42228 *total = cost->shift_const;
42229 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42230 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42232 /* Return the cost after shift-and truncation. */
42233 *total = cost->shift_var;
42234 return true;
42236 else
42237 *total = cost->shift_var;
42239 return false;
42241 case FMA:
42243 rtx sub;
42245 gcc_assert (FLOAT_MODE_P (mode));
42246 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42248 /* ??? SSE scalar/vector cost should be used here. */
42249 /* ??? Bald assumption that fma has the same cost as fmul. */
42250 *total = cost->fmul;
42251 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42253 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42254 sub = XEXP (x, 0);
42255 if (GET_CODE (sub) == NEG)
42256 sub = XEXP (sub, 0);
42257 *total += rtx_cost (sub, FMA, 0, speed);
42259 sub = XEXP (x, 2);
42260 if (GET_CODE (sub) == NEG)
42261 sub = XEXP (sub, 0);
42262 *total += rtx_cost (sub, FMA, 2, speed);
42263 return true;
42266 case MULT:
42267 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42269 /* ??? SSE scalar cost should be used here. */
42270 *total = cost->fmul;
42271 return false;
42273 else if (X87_FLOAT_MODE_P (mode))
42275 *total = cost->fmul;
42276 return false;
42278 else if (FLOAT_MODE_P (mode))
42280 /* ??? SSE vector cost should be used here. */
42281 *total = cost->fmul;
42282 return false;
42284 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42286 /* V*QImode is emulated with 7-13 insns. */
42287 if (mode == V16QImode || mode == V32QImode)
42289 int extra = 11;
42290 if (TARGET_XOP && mode == V16QImode)
42291 extra = 5;
42292 else if (TARGET_SSSE3)
42293 extra = 6;
42294 *total = cost->fmul * 2 + cost->fabs * extra;
42296 /* V*DImode is emulated with 5-8 insns. */
42297 else if (mode == V2DImode || mode == V4DImode)
42299 if (TARGET_XOP && mode == V2DImode)
42300 *total = cost->fmul * 2 + cost->fabs * 3;
42301 else
42302 *total = cost->fmul * 3 + cost->fabs * 5;
42304 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42305 insns, including two PMULUDQ. */
42306 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42307 *total = cost->fmul * 2 + cost->fabs * 5;
42308 else
42309 *total = cost->fmul;
42310 return false;
42312 else
42314 rtx op0 = XEXP (x, 0);
42315 rtx op1 = XEXP (x, 1);
42316 int nbits;
42317 if (CONST_INT_P (XEXP (x, 1)))
42319 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42320 for (nbits = 0; value != 0; value &= value - 1)
42321 nbits++;
42323 else
42324 /* This is arbitrary. */
42325 nbits = 7;
42327 /* Compute costs correctly for widening multiplication. */
42328 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42329 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42330 == GET_MODE_SIZE (mode))
42332 int is_mulwiden = 0;
42333 machine_mode inner_mode = GET_MODE (op0);
42335 if (GET_CODE (op0) == GET_CODE (op1))
42336 is_mulwiden = 1, op1 = XEXP (op1, 0);
42337 else if (CONST_INT_P (op1))
42339 if (GET_CODE (op0) == SIGN_EXTEND)
42340 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42341 == INTVAL (op1);
42342 else
42343 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42346 if (is_mulwiden)
42347 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42350 *total = (cost->mult_init[MODE_INDEX (mode)]
42351 + nbits * cost->mult_bit
42352 + rtx_cost (op0, outer_code, opno, speed)
42353 + rtx_cost (op1, outer_code, opno, speed));
42355 return true;
42358 case DIV:
42359 case UDIV:
42360 case MOD:
42361 case UMOD:
42362 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42363 /* ??? SSE cost should be used here. */
42364 *total = cost->fdiv;
42365 else if (X87_FLOAT_MODE_P (mode))
42366 *total = cost->fdiv;
42367 else if (FLOAT_MODE_P (mode))
42368 /* ??? SSE vector cost should be used here. */
42369 *total = cost->fdiv;
42370 else
42371 *total = cost->divide[MODE_INDEX (mode)];
42372 return false;
42374 case PLUS:
42375 if (GET_MODE_CLASS (mode) == MODE_INT
42376 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42378 if (GET_CODE (XEXP (x, 0)) == PLUS
42379 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42380 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42381 && CONSTANT_P (XEXP (x, 1)))
42383 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42384 if (val == 2 || val == 4 || val == 8)
42386 *total = cost->lea;
42387 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42388 outer_code, opno, speed);
42389 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42390 outer_code, opno, speed);
42391 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42392 return true;
42395 else if (GET_CODE (XEXP (x, 0)) == MULT
42396 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42398 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42399 if (val == 2 || val == 4 || val == 8)
42401 *total = cost->lea;
42402 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42403 outer_code, opno, speed);
42404 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42405 return true;
42408 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42410 *total = cost->lea;
42411 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42412 outer_code, opno, speed);
42413 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42414 outer_code, opno, speed);
42415 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42416 return true;
42419 /* FALLTHRU */
42421 case MINUS:
42422 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42424 /* ??? SSE cost should be used here. */
42425 *total = cost->fadd;
42426 return false;
42428 else if (X87_FLOAT_MODE_P (mode))
42430 *total = cost->fadd;
42431 return false;
42433 else if (FLOAT_MODE_P (mode))
42435 /* ??? SSE vector cost should be used here. */
42436 *total = cost->fadd;
42437 return false;
42439 /* FALLTHRU */
42441 case AND:
42442 case IOR:
42443 case XOR:
42444 if (GET_MODE_CLASS (mode) == MODE_INT
42445 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42447 *total = (cost->add * 2
42448 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42449 << (GET_MODE (XEXP (x, 0)) != DImode))
42450 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42451 << (GET_MODE (XEXP (x, 1)) != DImode)));
42452 return true;
42454 /* FALLTHRU */
42456 case NEG:
42457 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42459 /* ??? SSE cost should be used here. */
42460 *total = cost->fchs;
42461 return false;
42463 else if (X87_FLOAT_MODE_P (mode))
42465 *total = cost->fchs;
42466 return false;
42468 else if (FLOAT_MODE_P (mode))
42470 /* ??? SSE vector cost should be used here. */
42471 *total = cost->fchs;
42472 return false;
42474 /* FALLTHRU */
42476 case NOT:
42477 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42479 /* ??? Should be SSE vector operation cost. */
42480 /* At least for published AMD latencies, this really is the same
42481 as the latency for a simple fpu operation like fabs. */
42482 *total = cost->fabs;
42484 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42485 *total = cost->add * 2;
42486 else
42487 *total = cost->add;
42488 return false;
42490 case COMPARE:
42491 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42492 && XEXP (XEXP (x, 0), 1) == const1_rtx
42493 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42494 && XEXP (x, 1) == const0_rtx)
42496 /* This kind of construct is implemented using test[bwl].
42497 Treat it as if we had an AND. */
42498 *total = (cost->add
42499 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42500 + rtx_cost (const1_rtx, outer_code, opno, speed));
42501 return true;
42503 return false;
42505 case FLOAT_EXTEND:
42506 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42507 *total = 0;
42508 return false;
42510 case ABS:
42511 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42512 /* ??? SSE cost should be used here. */
42513 *total = cost->fabs;
42514 else if (X87_FLOAT_MODE_P (mode))
42515 *total = cost->fabs;
42516 else if (FLOAT_MODE_P (mode))
42517 /* ??? SSE vector cost should be used here. */
42518 *total = cost->fabs;
42519 return false;
42521 case SQRT:
42522 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42523 /* ??? SSE cost should be used here. */
42524 *total = cost->fsqrt;
42525 else if (X87_FLOAT_MODE_P (mode))
42526 *total = cost->fsqrt;
42527 else if (FLOAT_MODE_P (mode))
42528 /* ??? SSE vector cost should be used here. */
42529 *total = cost->fsqrt;
42530 return false;
42532 case UNSPEC:
42533 if (XINT (x, 1) == UNSPEC_TP)
42534 *total = 0;
42535 return false;
42537 case VEC_SELECT:
42538 case VEC_CONCAT:
42539 case VEC_DUPLICATE:
42540 /* ??? Assume all of these vector manipulation patterns are
42541 recognizable. In which case they all pretty much have the
42542 same cost. */
42543 *total = cost->fabs;
42544 return true;
42545 case VEC_MERGE:
42546 mask = XEXP (x, 2);
42547 /* This is masked instruction, assume the same cost,
42548 as nonmasked variant. */
42549 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42550 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42551 else
42552 *total = cost->fabs;
42553 return true;
42555 default:
42556 return false;
42560 #if TARGET_MACHO
42562 static int current_machopic_label_num;
42564 /* Given a symbol name and its associated stub, write out the
42565 definition of the stub. */
42567 void
42568 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42570 unsigned int length;
42571 char *binder_name, *symbol_name, lazy_ptr_name[32];
42572 int label = ++current_machopic_label_num;
42574 /* For 64-bit we shouldn't get here. */
42575 gcc_assert (!TARGET_64BIT);
42577 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42578 symb = targetm.strip_name_encoding (symb);
42580 length = strlen (stub);
42581 binder_name = XALLOCAVEC (char, length + 32);
42582 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42584 length = strlen (symb);
42585 symbol_name = XALLOCAVEC (char, length + 32);
42586 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42588 sprintf (lazy_ptr_name, "L%d$lz", label);
42590 if (MACHOPIC_ATT_STUB)
42591 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42592 else if (MACHOPIC_PURE)
42593 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42594 else
42595 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42597 fprintf (file, "%s:\n", stub);
42598 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42600 if (MACHOPIC_ATT_STUB)
42602 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42604 else if (MACHOPIC_PURE)
42606 /* PIC stub. */
42607 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42608 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42609 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42610 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42611 label, lazy_ptr_name, label);
42612 fprintf (file, "\tjmp\t*%%ecx\n");
42614 else
42615 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42617 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42618 it needs no stub-binding-helper. */
42619 if (MACHOPIC_ATT_STUB)
42620 return;
42622 fprintf (file, "%s:\n", binder_name);
42624 if (MACHOPIC_PURE)
42626 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42627 fprintf (file, "\tpushl\t%%ecx\n");
42629 else
42630 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42632 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42634 /* N.B. Keep the correspondence of these
42635 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42636 old-pic/new-pic/non-pic stubs; altering this will break
42637 compatibility with existing dylibs. */
42638 if (MACHOPIC_PURE)
42640 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42641 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42643 else
42644 /* 16-byte -mdynamic-no-pic stub. */
42645 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42647 fprintf (file, "%s:\n", lazy_ptr_name);
42648 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42649 fprintf (file, ASM_LONG "%s\n", binder_name);
42651 #endif /* TARGET_MACHO */
42653 /* Order the registers for register allocator. */
42655 void
42656 x86_order_regs_for_local_alloc (void)
42658 int pos = 0;
42659 int i;
42661 /* First allocate the local general purpose registers. */
42662 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42663 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42664 reg_alloc_order [pos++] = i;
42666 /* Global general purpose registers. */
42667 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42668 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42669 reg_alloc_order [pos++] = i;
42671 /* x87 registers come first in case we are doing FP math
42672 using them. */
42673 if (!TARGET_SSE_MATH)
42674 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42675 reg_alloc_order [pos++] = i;
42677 /* SSE registers. */
42678 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42679 reg_alloc_order [pos++] = i;
42680 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42681 reg_alloc_order [pos++] = i;
42683 /* Extended REX SSE registers. */
42684 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42685 reg_alloc_order [pos++] = i;
42687 /* Mask register. */
42688 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42689 reg_alloc_order [pos++] = i;
42691 /* MPX bound registers. */
42692 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42693 reg_alloc_order [pos++] = i;
42695 /* x87 registers. */
42696 if (TARGET_SSE_MATH)
42697 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42698 reg_alloc_order [pos++] = i;
42700 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42701 reg_alloc_order [pos++] = i;
42703 /* Initialize the rest of array as we do not allocate some registers
42704 at all. */
42705 while (pos < FIRST_PSEUDO_REGISTER)
42706 reg_alloc_order [pos++] = 0;
42709 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42710 in struct attribute_spec handler. */
42711 static tree
42712 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42713 tree args,
42714 int,
42715 bool *no_add_attrs)
42717 if (TREE_CODE (*node) != FUNCTION_TYPE
42718 && TREE_CODE (*node) != METHOD_TYPE
42719 && TREE_CODE (*node) != FIELD_DECL
42720 && TREE_CODE (*node) != TYPE_DECL)
42722 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42723 name);
42724 *no_add_attrs = true;
42725 return NULL_TREE;
42727 if (TARGET_64BIT)
42729 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42730 name);
42731 *no_add_attrs = true;
42732 return NULL_TREE;
42734 if (is_attribute_p ("callee_pop_aggregate_return", name))
42736 tree cst;
42738 cst = TREE_VALUE (args);
42739 if (TREE_CODE (cst) != INTEGER_CST)
42741 warning (OPT_Wattributes,
42742 "%qE attribute requires an integer constant argument",
42743 name);
42744 *no_add_attrs = true;
42746 else if (compare_tree_int (cst, 0) != 0
42747 && compare_tree_int (cst, 1) != 0)
42749 warning (OPT_Wattributes,
42750 "argument to %qE attribute is neither zero, nor one",
42751 name);
42752 *no_add_attrs = true;
42755 return NULL_TREE;
42758 return NULL_TREE;
42761 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42762 struct attribute_spec.handler. */
42763 static tree
42764 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42765 bool *no_add_attrs)
42767 if (TREE_CODE (*node) != FUNCTION_TYPE
42768 && TREE_CODE (*node) != METHOD_TYPE
42769 && TREE_CODE (*node) != FIELD_DECL
42770 && TREE_CODE (*node) != TYPE_DECL)
42772 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42773 name);
42774 *no_add_attrs = true;
42775 return NULL_TREE;
42778 /* Can combine regparm with all attributes but fastcall. */
42779 if (is_attribute_p ("ms_abi", name))
42781 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42783 error ("ms_abi and sysv_abi attributes are not compatible");
42786 return NULL_TREE;
42788 else if (is_attribute_p ("sysv_abi", name))
42790 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42792 error ("ms_abi and sysv_abi attributes are not compatible");
42795 return NULL_TREE;
42798 return NULL_TREE;
42801 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42802 struct attribute_spec.handler. */
42803 static tree
42804 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42805 bool *no_add_attrs)
42807 tree *type = NULL;
42808 if (DECL_P (*node))
42810 if (TREE_CODE (*node) == TYPE_DECL)
42811 type = &TREE_TYPE (*node);
42813 else
42814 type = node;
42816 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42818 warning (OPT_Wattributes, "%qE attribute ignored",
42819 name);
42820 *no_add_attrs = true;
42823 else if ((is_attribute_p ("ms_struct", name)
42824 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42825 || ((is_attribute_p ("gcc_struct", name)
42826 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42828 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42829 name);
42830 *no_add_attrs = true;
42833 return NULL_TREE;
42836 static tree
42837 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42838 bool *no_add_attrs)
42840 if (TREE_CODE (*node) != FUNCTION_DECL)
42842 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42843 name);
42844 *no_add_attrs = true;
42846 return NULL_TREE;
42849 static bool
42850 ix86_ms_bitfield_layout_p (const_tree record_type)
42852 return ((TARGET_MS_BITFIELD_LAYOUT
42853 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42854 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42857 /* Returns an expression indicating where the this parameter is
42858 located on entry to the FUNCTION. */
42860 static rtx
42861 x86_this_parameter (tree function)
42863 tree type = TREE_TYPE (function);
42864 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42865 int nregs;
42867 if (TARGET_64BIT)
42869 const int *parm_regs;
42871 if (ix86_function_type_abi (type) == MS_ABI)
42872 parm_regs = x86_64_ms_abi_int_parameter_registers;
42873 else
42874 parm_regs = x86_64_int_parameter_registers;
42875 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42878 nregs = ix86_function_regparm (type, function);
42880 if (nregs > 0 && !stdarg_p (type))
42882 int regno;
42883 unsigned int ccvt = ix86_get_callcvt (type);
42885 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42886 regno = aggr ? DX_REG : CX_REG;
42887 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42889 regno = CX_REG;
42890 if (aggr)
42891 return gen_rtx_MEM (SImode,
42892 plus_constant (Pmode, stack_pointer_rtx, 4));
42894 else
42896 regno = AX_REG;
42897 if (aggr)
42899 regno = DX_REG;
42900 if (nregs == 1)
42901 return gen_rtx_MEM (SImode,
42902 plus_constant (Pmode,
42903 stack_pointer_rtx, 4));
42906 return gen_rtx_REG (SImode, regno);
42909 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42910 aggr ? 8 : 4));
42913 /* Determine whether x86_output_mi_thunk can succeed. */
42915 static bool
42916 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42917 const_tree function)
42919 /* 64-bit can handle anything. */
42920 if (TARGET_64BIT)
42921 return true;
42923 /* For 32-bit, everything's fine if we have one free register. */
42924 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42925 return true;
42927 /* Need a free register for vcall_offset. */
42928 if (vcall_offset)
42929 return false;
42931 /* Need a free register for GOT references. */
42932 if (flag_pic && !targetm.binds_local_p (function))
42933 return false;
42935 /* Otherwise ok. */
42936 return true;
42939 /* Output the assembler code for a thunk function. THUNK_DECL is the
42940 declaration for the thunk function itself, FUNCTION is the decl for
42941 the target function. DELTA is an immediate constant offset to be
42942 added to THIS. If VCALL_OFFSET is nonzero, the word at
42943 *(*this + vcall_offset) should be added to THIS. */
42945 static void
42946 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42947 HOST_WIDE_INT vcall_offset, tree function)
42949 rtx this_param = x86_this_parameter (function);
42950 rtx this_reg, tmp, fnaddr;
42951 unsigned int tmp_regno;
42952 rtx_insn *insn;
42954 if (TARGET_64BIT)
42955 tmp_regno = R10_REG;
42956 else
42958 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42959 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42960 tmp_regno = AX_REG;
42961 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42962 tmp_regno = DX_REG;
42963 else
42964 tmp_regno = CX_REG;
42967 emit_note (NOTE_INSN_PROLOGUE_END);
42969 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42970 pull it in now and let DELTA benefit. */
42971 if (REG_P (this_param))
42972 this_reg = this_param;
42973 else if (vcall_offset)
42975 /* Put the this parameter into %eax. */
42976 this_reg = gen_rtx_REG (Pmode, AX_REG);
42977 emit_move_insn (this_reg, this_param);
42979 else
42980 this_reg = NULL_RTX;
42982 /* Adjust the this parameter by a fixed constant. */
42983 if (delta)
42985 rtx delta_rtx = GEN_INT (delta);
42986 rtx delta_dst = this_reg ? this_reg : this_param;
42988 if (TARGET_64BIT)
42990 if (!x86_64_general_operand (delta_rtx, Pmode))
42992 tmp = gen_rtx_REG (Pmode, tmp_regno);
42993 emit_move_insn (tmp, delta_rtx);
42994 delta_rtx = tmp;
42998 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43001 /* Adjust the this parameter by a value stored in the vtable. */
43002 if (vcall_offset)
43004 rtx vcall_addr, vcall_mem, this_mem;
43006 tmp = gen_rtx_REG (Pmode, tmp_regno);
43008 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43009 if (Pmode != ptr_mode)
43010 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43011 emit_move_insn (tmp, this_mem);
43013 /* Adjust the this parameter. */
43014 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43015 if (TARGET_64BIT
43016 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43018 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43019 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43020 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43023 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43024 if (Pmode != ptr_mode)
43025 emit_insn (gen_addsi_1_zext (this_reg,
43026 gen_rtx_REG (ptr_mode,
43027 REGNO (this_reg)),
43028 vcall_mem));
43029 else
43030 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43033 /* If necessary, drop THIS back to its stack slot. */
43034 if (this_reg && this_reg != this_param)
43035 emit_move_insn (this_param, this_reg);
43037 fnaddr = XEXP (DECL_RTL (function), 0);
43038 if (TARGET_64BIT)
43040 if (!flag_pic || targetm.binds_local_p (function)
43041 || TARGET_PECOFF)
43043 else
43045 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43046 tmp = gen_rtx_CONST (Pmode, tmp);
43047 fnaddr = gen_const_mem (Pmode, tmp);
43050 else
43052 if (!flag_pic || targetm.binds_local_p (function))
43054 #if TARGET_MACHO
43055 else if (TARGET_MACHO)
43057 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43058 fnaddr = XEXP (fnaddr, 0);
43060 #endif /* TARGET_MACHO */
43061 else
43063 tmp = gen_rtx_REG (Pmode, CX_REG);
43064 output_set_got (tmp, NULL_RTX);
43066 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43067 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43068 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43069 fnaddr = gen_const_mem (Pmode, fnaddr);
43073 /* Our sibling call patterns do not allow memories, because we have no
43074 predicate that can distinguish between frame and non-frame memory.
43075 For our purposes here, we can get away with (ab)using a jump pattern,
43076 because we're going to do no optimization. */
43077 if (MEM_P (fnaddr))
43079 if (sibcall_insn_operand (fnaddr, word_mode))
43081 fnaddr = XEXP (DECL_RTL (function), 0);
43082 tmp = gen_rtx_MEM (QImode, fnaddr);
43083 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43084 tmp = emit_call_insn (tmp);
43085 SIBLING_CALL_P (tmp) = 1;
43087 else
43088 emit_jump_insn (gen_indirect_jump (fnaddr));
43090 else
43092 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43094 // CM_LARGE_PIC always uses pseudo PIC register which is
43095 // uninitialized. Since FUNCTION is local and calling it
43096 // doesn't go through PLT, we use scratch register %r11 as
43097 // PIC register and initialize it here.
43098 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43099 ix86_init_large_pic_reg (tmp_regno);
43100 fnaddr = legitimize_pic_address (fnaddr,
43101 gen_rtx_REG (Pmode, tmp_regno));
43104 if (!sibcall_insn_operand (fnaddr, word_mode))
43106 tmp = gen_rtx_REG (word_mode, tmp_regno);
43107 if (GET_MODE (fnaddr) != word_mode)
43108 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43109 emit_move_insn (tmp, fnaddr);
43110 fnaddr = tmp;
43113 tmp = gen_rtx_MEM (QImode, fnaddr);
43114 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43115 tmp = emit_call_insn (tmp);
43116 SIBLING_CALL_P (tmp) = 1;
43118 emit_barrier ();
43120 /* Emit just enough of rest_of_compilation to get the insns emitted.
43121 Note that use_thunk calls assemble_start_function et al. */
43122 insn = get_insns ();
43123 shorten_branches (insn);
43124 final_start_function (insn, file, 1);
43125 final (insn, file, 1);
43126 final_end_function ();
43129 static void
43130 x86_file_start (void)
43132 default_file_start ();
43133 if (TARGET_16BIT)
43134 fputs ("\t.code16gcc\n", asm_out_file);
43135 #if TARGET_MACHO
43136 darwin_file_start ();
43137 #endif
43138 if (X86_FILE_START_VERSION_DIRECTIVE)
43139 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43140 if (X86_FILE_START_FLTUSED)
43141 fputs ("\t.global\t__fltused\n", asm_out_file);
43142 if (ix86_asm_dialect == ASM_INTEL)
43143 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43147 x86_field_alignment (tree field, int computed)
43149 machine_mode mode;
43150 tree type = TREE_TYPE (field);
43152 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43153 return computed;
43154 mode = TYPE_MODE (strip_array_types (type));
43155 if (mode == DFmode || mode == DCmode
43156 || GET_MODE_CLASS (mode) == MODE_INT
43157 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43158 return MIN (32, computed);
43159 return computed;
43162 /* Print call to TARGET to FILE. */
43164 static void
43165 x86_print_call_or_nop (FILE *file, const char *target)
43167 if (flag_nop_mcount)
43168 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43169 else
43170 fprintf (file, "1:\tcall\t%s\n", target);
43173 /* Output assembler code to FILE to increment profiler label # LABELNO
43174 for profiling a function entry. */
43175 void
43176 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43178 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43179 : MCOUNT_NAME);
43180 if (TARGET_64BIT)
43182 #ifndef NO_PROFILE_COUNTERS
43183 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43184 #endif
43186 if (!TARGET_PECOFF && flag_pic)
43187 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43188 else
43189 x86_print_call_or_nop (file, mcount_name);
43191 else if (flag_pic)
43193 #ifndef NO_PROFILE_COUNTERS
43194 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43195 LPREFIX, labelno);
43196 #endif
43197 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43199 else
43201 #ifndef NO_PROFILE_COUNTERS
43202 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43203 LPREFIX, labelno);
43204 #endif
43205 x86_print_call_or_nop (file, mcount_name);
43208 if (flag_record_mcount)
43210 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43211 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43212 fprintf (file, "\t.previous\n");
43216 /* We don't have exact information about the insn sizes, but we may assume
43217 quite safely that we are informed about all 1 byte insns and memory
43218 address sizes. This is enough to eliminate unnecessary padding in
43219 99% of cases. */
43221 static int
43222 min_insn_size (rtx_insn *insn)
43224 int l = 0, len;
43226 if (!INSN_P (insn) || !active_insn_p (insn))
43227 return 0;
43229 /* Discard alignments we've emit and jump instructions. */
43230 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43231 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43232 return 0;
43234 /* Important case - calls are always 5 bytes.
43235 It is common to have many calls in the row. */
43236 if (CALL_P (insn)
43237 && symbolic_reference_mentioned_p (PATTERN (insn))
43238 && !SIBLING_CALL_P (insn))
43239 return 5;
43240 len = get_attr_length (insn);
43241 if (len <= 1)
43242 return 1;
43244 /* For normal instructions we rely on get_attr_length being exact,
43245 with a few exceptions. */
43246 if (!JUMP_P (insn))
43248 enum attr_type type = get_attr_type (insn);
43250 switch (type)
43252 case TYPE_MULTI:
43253 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43254 || asm_noperands (PATTERN (insn)) >= 0)
43255 return 0;
43256 break;
43257 case TYPE_OTHER:
43258 case TYPE_FCMP:
43259 break;
43260 default:
43261 /* Otherwise trust get_attr_length. */
43262 return len;
43265 l = get_attr_length_address (insn);
43266 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43267 l = 4;
43269 if (l)
43270 return 1+l;
43271 else
43272 return 2;
43275 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43277 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43278 window. */
43280 static void
43281 ix86_avoid_jump_mispredicts (void)
43283 rtx_insn *insn, *start = get_insns ();
43284 int nbytes = 0, njumps = 0;
43285 bool isjump = false;
43287 /* Look for all minimal intervals of instructions containing 4 jumps.
43288 The intervals are bounded by START and INSN. NBYTES is the total
43289 size of instructions in the interval including INSN and not including
43290 START. When the NBYTES is smaller than 16 bytes, it is possible
43291 that the end of START and INSN ends up in the same 16byte page.
43293 The smallest offset in the page INSN can start is the case where START
43294 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43295 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43297 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43298 have to, control transfer to label(s) can be performed through other
43299 means, and also we estimate minimum length of all asm stmts as 0. */
43300 for (insn = start; insn; insn = NEXT_INSN (insn))
43302 int min_size;
43304 if (LABEL_P (insn))
43306 int align = label_to_alignment (insn);
43307 int max_skip = label_to_max_skip (insn);
43309 if (max_skip > 15)
43310 max_skip = 15;
43311 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43312 already in the current 16 byte page, because otherwise
43313 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43314 bytes to reach 16 byte boundary. */
43315 if (align <= 0
43316 || (align <= 3 && max_skip != (1 << align) - 1))
43317 max_skip = 0;
43318 if (dump_file)
43319 fprintf (dump_file, "Label %i with max_skip %i\n",
43320 INSN_UID (insn), max_skip);
43321 if (max_skip)
43323 while (nbytes + max_skip >= 16)
43325 start = NEXT_INSN (start);
43326 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43327 || CALL_P (start))
43328 njumps--, isjump = true;
43329 else
43330 isjump = false;
43331 nbytes -= min_insn_size (start);
43334 continue;
43337 min_size = min_insn_size (insn);
43338 nbytes += min_size;
43339 if (dump_file)
43340 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43341 INSN_UID (insn), min_size);
43342 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43343 || CALL_P (insn))
43344 njumps++;
43345 else
43346 continue;
43348 while (njumps > 3)
43350 start = NEXT_INSN (start);
43351 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43352 || CALL_P (start))
43353 njumps--, isjump = true;
43354 else
43355 isjump = false;
43356 nbytes -= min_insn_size (start);
43358 gcc_assert (njumps >= 0);
43359 if (dump_file)
43360 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43361 INSN_UID (start), INSN_UID (insn), nbytes);
43363 if (njumps == 3 && isjump && nbytes < 16)
43365 int padsize = 15 - nbytes + min_insn_size (insn);
43367 if (dump_file)
43368 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43369 INSN_UID (insn), padsize);
43370 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43374 #endif
43376 /* AMD Athlon works faster
43377 when RET is not destination of conditional jump or directly preceded
43378 by other jump instruction. We avoid the penalty by inserting NOP just
43379 before the RET instructions in such cases. */
43380 static void
43381 ix86_pad_returns (void)
43383 edge e;
43384 edge_iterator ei;
43386 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43388 basic_block bb = e->src;
43389 rtx_insn *ret = BB_END (bb);
43390 rtx_insn *prev;
43391 bool replace = false;
43393 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43394 || optimize_bb_for_size_p (bb))
43395 continue;
43396 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43397 if (active_insn_p (prev) || LABEL_P (prev))
43398 break;
43399 if (prev && LABEL_P (prev))
43401 edge e;
43402 edge_iterator ei;
43404 FOR_EACH_EDGE (e, ei, bb->preds)
43405 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43406 && !(e->flags & EDGE_FALLTHRU))
43408 replace = true;
43409 break;
43412 if (!replace)
43414 prev = prev_active_insn (ret);
43415 if (prev
43416 && ((JUMP_P (prev) && any_condjump_p (prev))
43417 || CALL_P (prev)))
43418 replace = true;
43419 /* Empty functions get branch mispredict even when
43420 the jump destination is not visible to us. */
43421 if (!prev && !optimize_function_for_size_p (cfun))
43422 replace = true;
43424 if (replace)
43426 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43427 delete_insn (ret);
43432 /* Count the minimum number of instructions in BB. Return 4 if the
43433 number of instructions >= 4. */
43435 static int
43436 ix86_count_insn_bb (basic_block bb)
43438 rtx_insn *insn;
43439 int insn_count = 0;
43441 /* Count number of instructions in this block. Return 4 if the number
43442 of instructions >= 4. */
43443 FOR_BB_INSNS (bb, insn)
43445 /* Only happen in exit blocks. */
43446 if (JUMP_P (insn)
43447 && ANY_RETURN_P (PATTERN (insn)))
43448 break;
43450 if (NONDEBUG_INSN_P (insn)
43451 && GET_CODE (PATTERN (insn)) != USE
43452 && GET_CODE (PATTERN (insn)) != CLOBBER)
43454 insn_count++;
43455 if (insn_count >= 4)
43456 return insn_count;
43460 return insn_count;
43464 /* Count the minimum number of instructions in code path in BB.
43465 Return 4 if the number of instructions >= 4. */
43467 static int
43468 ix86_count_insn (basic_block bb)
43470 edge e;
43471 edge_iterator ei;
43472 int min_prev_count;
43474 /* Only bother counting instructions along paths with no
43475 more than 2 basic blocks between entry and exit. Given
43476 that BB has an edge to exit, determine if a predecessor
43477 of BB has an edge from entry. If so, compute the number
43478 of instructions in the predecessor block. If there
43479 happen to be multiple such blocks, compute the minimum. */
43480 min_prev_count = 4;
43481 FOR_EACH_EDGE (e, ei, bb->preds)
43483 edge prev_e;
43484 edge_iterator prev_ei;
43486 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43488 min_prev_count = 0;
43489 break;
43491 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43493 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43495 int count = ix86_count_insn_bb (e->src);
43496 if (count < min_prev_count)
43497 min_prev_count = count;
43498 break;
43503 if (min_prev_count < 4)
43504 min_prev_count += ix86_count_insn_bb (bb);
43506 return min_prev_count;
43509 /* Pad short function to 4 instructions. */
43511 static void
43512 ix86_pad_short_function (void)
43514 edge e;
43515 edge_iterator ei;
43517 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43519 rtx_insn *ret = BB_END (e->src);
43520 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43522 int insn_count = ix86_count_insn (e->src);
43524 /* Pad short function. */
43525 if (insn_count < 4)
43527 rtx_insn *insn = ret;
43529 /* Find epilogue. */
43530 while (insn
43531 && (!NOTE_P (insn)
43532 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43533 insn = PREV_INSN (insn);
43535 if (!insn)
43536 insn = ret;
43538 /* Two NOPs count as one instruction. */
43539 insn_count = 2 * (4 - insn_count);
43540 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43546 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43547 the epilogue, the Windows system unwinder will apply epilogue logic and
43548 produce incorrect offsets. This can be avoided by adding a nop between
43549 the last insn that can throw and the first insn of the epilogue. */
43551 static void
43552 ix86_seh_fixup_eh_fallthru (void)
43554 edge e;
43555 edge_iterator ei;
43557 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43559 rtx_insn *insn, *next;
43561 /* Find the beginning of the epilogue. */
43562 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43563 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43564 break;
43565 if (insn == NULL)
43566 continue;
43568 /* We only care about preceding insns that can throw. */
43569 insn = prev_active_insn (insn);
43570 if (insn == NULL || !can_throw_internal (insn))
43571 continue;
43573 /* Do not separate calls from their debug information. */
43574 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43575 if (NOTE_P (next)
43576 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43577 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43578 insn = next;
43579 else
43580 break;
43582 emit_insn_after (gen_nops (const1_rtx), insn);
43586 /* Implement machine specific optimizations. We implement padding of returns
43587 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43588 static void
43589 ix86_reorg (void)
43591 /* We are freeing block_for_insn in the toplev to keep compatibility
43592 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43593 compute_bb_for_insn ();
43595 if (TARGET_SEH && current_function_has_exception_handlers ())
43596 ix86_seh_fixup_eh_fallthru ();
43598 if (optimize && optimize_function_for_speed_p (cfun))
43600 if (TARGET_PAD_SHORT_FUNCTION)
43601 ix86_pad_short_function ();
43602 else if (TARGET_PAD_RETURNS)
43603 ix86_pad_returns ();
43604 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43605 if (TARGET_FOUR_JUMP_LIMIT)
43606 ix86_avoid_jump_mispredicts ();
43607 #endif
43611 /* Return nonzero when QImode register that must be represented via REX prefix
43612 is used. */
43613 bool
43614 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43616 int i;
43617 extract_insn_cached (insn);
43618 for (i = 0; i < recog_data.n_operands; i++)
43619 if (GENERAL_REG_P (recog_data.operand[i])
43620 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43621 return true;
43622 return false;
43625 /* Return true when INSN mentions register that must be encoded using REX
43626 prefix. */
43627 bool
43628 x86_extended_reg_mentioned_p (rtx insn)
43630 subrtx_iterator::array_type array;
43631 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43633 const_rtx x = *iter;
43634 if (REG_P (x)
43635 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43636 return true;
43638 return false;
43641 /* If profitable, negate (without causing overflow) integer constant
43642 of mode MODE at location LOC. Return true in this case. */
43643 bool
43644 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43646 HOST_WIDE_INT val;
43648 if (!CONST_INT_P (*loc))
43649 return false;
43651 switch (mode)
43653 case DImode:
43654 /* DImode x86_64 constants must fit in 32 bits. */
43655 gcc_assert (x86_64_immediate_operand (*loc, mode));
43657 mode = SImode;
43658 break;
43660 case SImode:
43661 case HImode:
43662 case QImode:
43663 break;
43665 default:
43666 gcc_unreachable ();
43669 /* Avoid overflows. */
43670 if (mode_signbit_p (mode, *loc))
43671 return false;
43673 val = INTVAL (*loc);
43675 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43676 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43677 if ((val < 0 && val != -128)
43678 || val == 128)
43680 *loc = GEN_INT (-val);
43681 return true;
43684 return false;
43687 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43688 optabs would emit if we didn't have TFmode patterns. */
43690 void
43691 x86_emit_floatuns (rtx operands[2])
43693 rtx_code_label *neglab, *donelab;
43694 rtx i0, i1, f0, in, out;
43695 machine_mode mode, inmode;
43697 inmode = GET_MODE (operands[1]);
43698 gcc_assert (inmode == SImode || inmode == DImode);
43700 out = operands[0];
43701 in = force_reg (inmode, operands[1]);
43702 mode = GET_MODE (out);
43703 neglab = gen_label_rtx ();
43704 donelab = gen_label_rtx ();
43705 f0 = gen_reg_rtx (mode);
43707 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43709 expand_float (out, in, 0);
43711 emit_jump_insn (gen_jump (donelab));
43712 emit_barrier ();
43714 emit_label (neglab);
43716 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43717 1, OPTAB_DIRECT);
43718 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43719 1, OPTAB_DIRECT);
43720 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43722 expand_float (f0, i0, 0);
43724 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43726 emit_label (donelab);
43729 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43730 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43731 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43732 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43734 /* Get a vector mode of the same size as the original but with elements
43735 twice as wide. This is only guaranteed to apply to integral vectors. */
43737 static inline machine_mode
43738 get_mode_wider_vector (machine_mode o)
43740 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43741 machine_mode n = GET_MODE_WIDER_MODE (o);
43742 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43743 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43744 return n;
43747 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43748 fill target with val via vec_duplicate. */
43750 static bool
43751 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43753 bool ok;
43754 rtx_insn *insn;
43755 rtx dup;
43757 /* First attempt to recognize VAL as-is. */
43758 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43759 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43760 if (recog_memoized (insn) < 0)
43762 rtx_insn *seq;
43763 /* If that fails, force VAL into a register. */
43765 start_sequence ();
43766 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43767 seq = get_insns ();
43768 end_sequence ();
43769 if (seq)
43770 emit_insn_before (seq, insn);
43772 ok = recog_memoized (insn) >= 0;
43773 gcc_assert (ok);
43775 return true;
43778 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43779 with all elements equal to VAR. Return true if successful. */
43781 static bool
43782 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43783 rtx target, rtx val)
43785 bool ok;
43787 switch (mode)
43789 case V2SImode:
43790 case V2SFmode:
43791 if (!mmx_ok)
43792 return false;
43793 /* FALLTHRU */
43795 case V4DFmode:
43796 case V4DImode:
43797 case V8SFmode:
43798 case V8SImode:
43799 case V2DFmode:
43800 case V2DImode:
43801 case V4SFmode:
43802 case V4SImode:
43803 case V16SImode:
43804 case V8DImode:
43805 case V16SFmode:
43806 case V8DFmode:
43807 return ix86_vector_duplicate_value (mode, target, val);
43809 case V4HImode:
43810 if (!mmx_ok)
43811 return false;
43812 if (TARGET_SSE || TARGET_3DNOW_A)
43814 rtx x;
43816 val = gen_lowpart (SImode, val);
43817 x = gen_rtx_TRUNCATE (HImode, val);
43818 x = gen_rtx_VEC_DUPLICATE (mode, x);
43819 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43820 return true;
43822 goto widen;
43824 case V8QImode:
43825 if (!mmx_ok)
43826 return false;
43827 goto widen;
43829 case V8HImode:
43830 if (TARGET_AVX2)
43831 return ix86_vector_duplicate_value (mode, target, val);
43833 if (TARGET_SSE2)
43835 struct expand_vec_perm_d dperm;
43836 rtx tmp1, tmp2;
43838 permute:
43839 memset (&dperm, 0, sizeof (dperm));
43840 dperm.target = target;
43841 dperm.vmode = mode;
43842 dperm.nelt = GET_MODE_NUNITS (mode);
43843 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43844 dperm.one_operand_p = true;
43846 /* Extend to SImode using a paradoxical SUBREG. */
43847 tmp1 = gen_reg_rtx (SImode);
43848 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43850 /* Insert the SImode value as low element of a V4SImode vector. */
43851 tmp2 = gen_reg_rtx (V4SImode);
43852 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43853 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43855 ok = (expand_vec_perm_1 (&dperm)
43856 || expand_vec_perm_broadcast_1 (&dperm));
43857 gcc_assert (ok);
43858 return ok;
43860 goto widen;
43862 case V16QImode:
43863 if (TARGET_AVX2)
43864 return ix86_vector_duplicate_value (mode, target, val);
43866 if (TARGET_SSE2)
43867 goto permute;
43868 goto widen;
43870 widen:
43871 /* Replicate the value once into the next wider mode and recurse. */
43873 machine_mode smode, wsmode, wvmode;
43874 rtx x;
43876 smode = GET_MODE_INNER (mode);
43877 wvmode = get_mode_wider_vector (mode);
43878 wsmode = GET_MODE_INNER (wvmode);
43880 val = convert_modes (wsmode, smode, val, true);
43881 x = expand_simple_binop (wsmode, ASHIFT, val,
43882 GEN_INT (GET_MODE_BITSIZE (smode)),
43883 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43884 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43886 x = gen_reg_rtx (wvmode);
43887 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43888 gcc_assert (ok);
43889 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43890 return ok;
43893 case V16HImode:
43894 case V32QImode:
43895 if (TARGET_AVX2)
43896 return ix86_vector_duplicate_value (mode, target, val);
43897 else
43899 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43900 rtx x = gen_reg_rtx (hvmode);
43902 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43903 gcc_assert (ok);
43905 x = gen_rtx_VEC_CONCAT (mode, x, x);
43906 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43908 return true;
43910 case V64QImode:
43911 case V32HImode:
43912 if (TARGET_AVX512BW)
43913 return ix86_vector_duplicate_value (mode, target, val);
43914 else
43916 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43917 rtx x = gen_reg_rtx (hvmode);
43919 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43920 gcc_assert (ok);
43922 x = gen_rtx_VEC_CONCAT (mode, x, x);
43923 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43925 return true;
43927 default:
43928 return false;
43932 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43933 whose ONE_VAR element is VAR, and other elements are zero. Return true
43934 if successful. */
43936 static bool
43937 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43938 rtx target, rtx var, int one_var)
43940 machine_mode vsimode;
43941 rtx new_target;
43942 rtx x, tmp;
43943 bool use_vector_set = false;
43945 switch (mode)
43947 case V2DImode:
43948 /* For SSE4.1, we normally use vector set. But if the second
43949 element is zero and inter-unit moves are OK, we use movq
43950 instead. */
43951 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43952 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43953 && one_var == 0));
43954 break;
43955 case V16QImode:
43956 case V4SImode:
43957 case V4SFmode:
43958 use_vector_set = TARGET_SSE4_1;
43959 break;
43960 case V8HImode:
43961 use_vector_set = TARGET_SSE2;
43962 break;
43963 case V4HImode:
43964 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43965 break;
43966 case V32QImode:
43967 case V16HImode:
43968 case V8SImode:
43969 case V8SFmode:
43970 case V4DFmode:
43971 use_vector_set = TARGET_AVX;
43972 break;
43973 case V4DImode:
43974 /* Use ix86_expand_vector_set in 64bit mode only. */
43975 use_vector_set = TARGET_AVX && TARGET_64BIT;
43976 break;
43977 default:
43978 break;
43981 if (use_vector_set)
43983 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43984 var = force_reg (GET_MODE_INNER (mode), var);
43985 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43986 return true;
43989 switch (mode)
43991 case V2SFmode:
43992 case V2SImode:
43993 if (!mmx_ok)
43994 return false;
43995 /* FALLTHRU */
43997 case V2DFmode:
43998 case V2DImode:
43999 if (one_var != 0)
44000 return false;
44001 var = force_reg (GET_MODE_INNER (mode), var);
44002 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44003 emit_insn (gen_rtx_SET (VOIDmode, target, x));
44004 return true;
44006 case V4SFmode:
44007 case V4SImode:
44008 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44009 new_target = gen_reg_rtx (mode);
44010 else
44011 new_target = target;
44012 var = force_reg (GET_MODE_INNER (mode), var);
44013 x = gen_rtx_VEC_DUPLICATE (mode, var);
44014 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44015 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
44016 if (one_var != 0)
44018 /* We need to shuffle the value to the correct position, so
44019 create a new pseudo to store the intermediate result. */
44021 /* With SSE2, we can use the integer shuffle insns. */
44022 if (mode != V4SFmode && TARGET_SSE2)
44024 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44025 const1_rtx,
44026 GEN_INT (one_var == 1 ? 0 : 1),
44027 GEN_INT (one_var == 2 ? 0 : 1),
44028 GEN_INT (one_var == 3 ? 0 : 1)));
44029 if (target != new_target)
44030 emit_move_insn (target, new_target);
44031 return true;
44034 /* Otherwise convert the intermediate result to V4SFmode and
44035 use the SSE1 shuffle instructions. */
44036 if (mode != V4SFmode)
44038 tmp = gen_reg_rtx (V4SFmode);
44039 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44041 else
44042 tmp = new_target;
44044 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44045 const1_rtx,
44046 GEN_INT (one_var == 1 ? 0 : 1),
44047 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44048 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44050 if (mode != V4SFmode)
44051 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44052 else if (tmp != target)
44053 emit_move_insn (target, tmp);
44055 else if (target != new_target)
44056 emit_move_insn (target, new_target);
44057 return true;
44059 case V8HImode:
44060 case V16QImode:
44061 vsimode = V4SImode;
44062 goto widen;
44063 case V4HImode:
44064 case V8QImode:
44065 if (!mmx_ok)
44066 return false;
44067 vsimode = V2SImode;
44068 goto widen;
44069 widen:
44070 if (one_var != 0)
44071 return false;
44073 /* Zero extend the variable element to SImode and recurse. */
44074 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44076 x = gen_reg_rtx (vsimode);
44077 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44078 var, one_var))
44079 gcc_unreachable ();
44081 emit_move_insn (target, gen_lowpart (mode, x));
44082 return true;
44084 default:
44085 return false;
44089 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44090 consisting of the values in VALS. It is known that all elements
44091 except ONE_VAR are constants. Return true if successful. */
44093 static bool
44094 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44095 rtx target, rtx vals, int one_var)
44097 rtx var = XVECEXP (vals, 0, one_var);
44098 machine_mode wmode;
44099 rtx const_vec, x;
44101 const_vec = copy_rtx (vals);
44102 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44103 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44105 switch (mode)
44107 case V2DFmode:
44108 case V2DImode:
44109 case V2SFmode:
44110 case V2SImode:
44111 /* For the two element vectors, it's just as easy to use
44112 the general case. */
44113 return false;
44115 case V4DImode:
44116 /* Use ix86_expand_vector_set in 64bit mode only. */
44117 if (!TARGET_64BIT)
44118 return false;
44119 case V4DFmode:
44120 case V8SFmode:
44121 case V8SImode:
44122 case V16HImode:
44123 case V32QImode:
44124 case V4SFmode:
44125 case V4SImode:
44126 case V8HImode:
44127 case V4HImode:
44128 break;
44130 case V16QImode:
44131 if (TARGET_SSE4_1)
44132 break;
44133 wmode = V8HImode;
44134 goto widen;
44135 case V8QImode:
44136 wmode = V4HImode;
44137 goto widen;
44138 widen:
44139 /* There's no way to set one QImode entry easily. Combine
44140 the variable value with its adjacent constant value, and
44141 promote to an HImode set. */
44142 x = XVECEXP (vals, 0, one_var ^ 1);
44143 if (one_var & 1)
44145 var = convert_modes (HImode, QImode, var, true);
44146 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44147 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44148 x = GEN_INT (INTVAL (x) & 0xff);
44150 else
44152 var = convert_modes (HImode, QImode, var, true);
44153 x = gen_int_mode (INTVAL (x) << 8, HImode);
44155 if (x != const0_rtx)
44156 var = expand_simple_binop (HImode, IOR, var, x, var,
44157 1, OPTAB_LIB_WIDEN);
44159 x = gen_reg_rtx (wmode);
44160 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44161 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44163 emit_move_insn (target, gen_lowpart (mode, x));
44164 return true;
44166 default:
44167 return false;
44170 emit_move_insn (target, const_vec);
44171 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44172 return true;
44175 /* A subroutine of ix86_expand_vector_init_general. Use vector
44176 concatenate to handle the most general case: all values variable,
44177 and none identical. */
44179 static void
44180 ix86_expand_vector_init_concat (machine_mode mode,
44181 rtx target, rtx *ops, int n)
44183 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44184 rtx first[16], second[8], third[4];
44185 rtvec v;
44186 int i, j;
44188 switch (n)
44190 case 2:
44191 switch (mode)
44193 case V16SImode:
44194 cmode = V8SImode;
44195 break;
44196 case V16SFmode:
44197 cmode = V8SFmode;
44198 break;
44199 case V8DImode:
44200 cmode = V4DImode;
44201 break;
44202 case V8DFmode:
44203 cmode = V4DFmode;
44204 break;
44205 case V8SImode:
44206 cmode = V4SImode;
44207 break;
44208 case V8SFmode:
44209 cmode = V4SFmode;
44210 break;
44211 case V4DImode:
44212 cmode = V2DImode;
44213 break;
44214 case V4DFmode:
44215 cmode = V2DFmode;
44216 break;
44217 case V4SImode:
44218 cmode = V2SImode;
44219 break;
44220 case V4SFmode:
44221 cmode = V2SFmode;
44222 break;
44223 case V2DImode:
44224 cmode = DImode;
44225 break;
44226 case V2SImode:
44227 cmode = SImode;
44228 break;
44229 case V2DFmode:
44230 cmode = DFmode;
44231 break;
44232 case V2SFmode:
44233 cmode = SFmode;
44234 break;
44235 default:
44236 gcc_unreachable ();
44239 if (!register_operand (ops[1], cmode))
44240 ops[1] = force_reg (cmode, ops[1]);
44241 if (!register_operand (ops[0], cmode))
44242 ops[0] = force_reg (cmode, ops[0]);
44243 emit_insn (gen_rtx_SET (VOIDmode, target,
44244 gen_rtx_VEC_CONCAT (mode, ops[0],
44245 ops[1])));
44246 break;
44248 case 4:
44249 switch (mode)
44251 case V4DImode:
44252 cmode = V2DImode;
44253 break;
44254 case V4DFmode:
44255 cmode = V2DFmode;
44256 break;
44257 case V4SImode:
44258 cmode = V2SImode;
44259 break;
44260 case V4SFmode:
44261 cmode = V2SFmode;
44262 break;
44263 default:
44264 gcc_unreachable ();
44266 goto half;
44268 case 8:
44269 switch (mode)
44271 case V8DImode:
44272 cmode = V2DImode;
44273 hmode = V4DImode;
44274 break;
44275 case V8DFmode:
44276 cmode = V2DFmode;
44277 hmode = V4DFmode;
44278 break;
44279 case V8SImode:
44280 cmode = V2SImode;
44281 hmode = V4SImode;
44282 break;
44283 case V8SFmode:
44284 cmode = V2SFmode;
44285 hmode = V4SFmode;
44286 break;
44287 default:
44288 gcc_unreachable ();
44290 goto half;
44292 case 16:
44293 switch (mode)
44295 case V16SImode:
44296 cmode = V2SImode;
44297 hmode = V4SImode;
44298 gmode = V8SImode;
44299 break;
44300 case V16SFmode:
44301 cmode = V2SFmode;
44302 hmode = V4SFmode;
44303 gmode = V8SFmode;
44304 break;
44305 default:
44306 gcc_unreachable ();
44308 goto half;
44310 half:
44311 /* FIXME: We process inputs backward to help RA. PR 36222. */
44312 i = n - 1;
44313 j = (n >> 1) - 1;
44314 for (; i > 0; i -= 2, j--)
44316 first[j] = gen_reg_rtx (cmode);
44317 v = gen_rtvec (2, ops[i - 1], ops[i]);
44318 ix86_expand_vector_init (false, first[j],
44319 gen_rtx_PARALLEL (cmode, v));
44322 n >>= 1;
44323 if (n > 4)
44325 gcc_assert (hmode != VOIDmode);
44326 gcc_assert (gmode != VOIDmode);
44327 for (i = j = 0; i < n; i += 2, j++)
44329 second[j] = gen_reg_rtx (hmode);
44330 ix86_expand_vector_init_concat (hmode, second [j],
44331 &first [i], 2);
44333 n >>= 1;
44334 for (i = j = 0; i < n; i += 2, j++)
44336 third[j] = gen_reg_rtx (gmode);
44337 ix86_expand_vector_init_concat (gmode, third[j],
44338 &second[i], 2);
44340 n >>= 1;
44341 ix86_expand_vector_init_concat (mode, target, third, n);
44343 else if (n > 2)
44345 gcc_assert (hmode != VOIDmode);
44346 for (i = j = 0; i < n; i += 2, j++)
44348 second[j] = gen_reg_rtx (hmode);
44349 ix86_expand_vector_init_concat (hmode, second [j],
44350 &first [i], 2);
44352 n >>= 1;
44353 ix86_expand_vector_init_concat (mode, target, second, n);
44355 else
44356 ix86_expand_vector_init_concat (mode, target, first, n);
44357 break;
44359 default:
44360 gcc_unreachable ();
44364 /* A subroutine of ix86_expand_vector_init_general. Use vector
44365 interleave to handle the most general case: all values variable,
44366 and none identical. */
44368 static void
44369 ix86_expand_vector_init_interleave (machine_mode mode,
44370 rtx target, rtx *ops, int n)
44372 machine_mode first_imode, second_imode, third_imode, inner_mode;
44373 int i, j;
44374 rtx op0, op1;
44375 rtx (*gen_load_even) (rtx, rtx, rtx);
44376 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44377 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44379 switch (mode)
44381 case V8HImode:
44382 gen_load_even = gen_vec_setv8hi;
44383 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44384 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44385 inner_mode = HImode;
44386 first_imode = V4SImode;
44387 second_imode = V2DImode;
44388 third_imode = VOIDmode;
44389 break;
44390 case V16QImode:
44391 gen_load_even = gen_vec_setv16qi;
44392 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44393 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44394 inner_mode = QImode;
44395 first_imode = V8HImode;
44396 second_imode = V4SImode;
44397 third_imode = V2DImode;
44398 break;
44399 default:
44400 gcc_unreachable ();
44403 for (i = 0; i < n; i++)
44405 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44406 op0 = gen_reg_rtx (SImode);
44407 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44409 /* Insert the SImode value as low element of V4SImode vector. */
44410 op1 = gen_reg_rtx (V4SImode);
44411 op0 = gen_rtx_VEC_MERGE (V4SImode,
44412 gen_rtx_VEC_DUPLICATE (V4SImode,
44413 op0),
44414 CONST0_RTX (V4SImode),
44415 const1_rtx);
44416 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44418 /* Cast the V4SImode vector back to a vector in orignal mode. */
44419 op0 = gen_reg_rtx (mode);
44420 emit_move_insn (op0, gen_lowpart (mode, op1));
44422 /* Load even elements into the second position. */
44423 emit_insn (gen_load_even (op0,
44424 force_reg (inner_mode,
44425 ops [i + i + 1]),
44426 const1_rtx));
44428 /* Cast vector to FIRST_IMODE vector. */
44429 ops[i] = gen_reg_rtx (first_imode);
44430 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44433 /* Interleave low FIRST_IMODE vectors. */
44434 for (i = j = 0; i < n; i += 2, j++)
44436 op0 = gen_reg_rtx (first_imode);
44437 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44439 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44440 ops[j] = gen_reg_rtx (second_imode);
44441 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44444 /* Interleave low SECOND_IMODE vectors. */
44445 switch (second_imode)
44447 case V4SImode:
44448 for (i = j = 0; i < n / 2; i += 2, j++)
44450 op0 = gen_reg_rtx (second_imode);
44451 emit_insn (gen_interleave_second_low (op0, ops[i],
44452 ops[i + 1]));
44454 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44455 vector. */
44456 ops[j] = gen_reg_rtx (third_imode);
44457 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44459 second_imode = V2DImode;
44460 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44461 /* FALLTHRU */
44463 case V2DImode:
44464 op0 = gen_reg_rtx (second_imode);
44465 emit_insn (gen_interleave_second_low (op0, ops[0],
44466 ops[1]));
44468 /* Cast the SECOND_IMODE vector back to a vector on original
44469 mode. */
44470 emit_insn (gen_rtx_SET (VOIDmode, target,
44471 gen_lowpart (mode, op0)));
44472 break;
44474 default:
44475 gcc_unreachable ();
44479 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44480 all values variable, and none identical. */
44482 static void
44483 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44484 rtx target, rtx vals)
44486 rtx ops[64], op0, op1, op2, op3, op4, op5;
44487 machine_mode half_mode = VOIDmode;
44488 machine_mode quarter_mode = VOIDmode;
44489 int n, i;
44491 switch (mode)
44493 case V2SFmode:
44494 case V2SImode:
44495 if (!mmx_ok && !TARGET_SSE)
44496 break;
44497 /* FALLTHRU */
44499 case V16SImode:
44500 case V16SFmode:
44501 case V8DFmode:
44502 case V8DImode:
44503 case V8SFmode:
44504 case V8SImode:
44505 case V4DFmode:
44506 case V4DImode:
44507 case V4SFmode:
44508 case V4SImode:
44509 case V2DFmode:
44510 case V2DImode:
44511 n = GET_MODE_NUNITS (mode);
44512 for (i = 0; i < n; i++)
44513 ops[i] = XVECEXP (vals, 0, i);
44514 ix86_expand_vector_init_concat (mode, target, ops, n);
44515 return;
44517 case V32QImode:
44518 half_mode = V16QImode;
44519 goto half;
44521 case V16HImode:
44522 half_mode = V8HImode;
44523 goto half;
44525 half:
44526 n = GET_MODE_NUNITS (mode);
44527 for (i = 0; i < n; i++)
44528 ops[i] = XVECEXP (vals, 0, i);
44529 op0 = gen_reg_rtx (half_mode);
44530 op1 = gen_reg_rtx (half_mode);
44531 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44532 n >> 2);
44533 ix86_expand_vector_init_interleave (half_mode, op1,
44534 &ops [n >> 1], n >> 2);
44535 emit_insn (gen_rtx_SET (VOIDmode, target,
44536 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44537 return;
44539 case V64QImode:
44540 quarter_mode = V16QImode;
44541 half_mode = V32QImode;
44542 goto quarter;
44544 case V32HImode:
44545 quarter_mode = V8HImode;
44546 half_mode = V16HImode;
44547 goto quarter;
44549 quarter:
44550 n = GET_MODE_NUNITS (mode);
44551 for (i = 0; i < n; i++)
44552 ops[i] = XVECEXP (vals, 0, i);
44553 op0 = gen_reg_rtx (quarter_mode);
44554 op1 = gen_reg_rtx (quarter_mode);
44555 op2 = gen_reg_rtx (quarter_mode);
44556 op3 = gen_reg_rtx (quarter_mode);
44557 op4 = gen_reg_rtx (half_mode);
44558 op5 = gen_reg_rtx (half_mode);
44559 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44560 n >> 3);
44561 ix86_expand_vector_init_interleave (quarter_mode, op1,
44562 &ops [n >> 2], n >> 3);
44563 ix86_expand_vector_init_interleave (quarter_mode, op2,
44564 &ops [n >> 1], n >> 3);
44565 ix86_expand_vector_init_interleave (quarter_mode, op3,
44566 &ops [(n >> 1) | (n >> 2)], n >> 3);
44567 emit_insn (gen_rtx_SET (VOIDmode, op4,
44568 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44569 emit_insn (gen_rtx_SET (VOIDmode, op5,
44570 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44571 emit_insn (gen_rtx_SET (VOIDmode, target,
44572 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44573 return;
44575 case V16QImode:
44576 if (!TARGET_SSE4_1)
44577 break;
44578 /* FALLTHRU */
44580 case V8HImode:
44581 if (!TARGET_SSE2)
44582 break;
44584 /* Don't use ix86_expand_vector_init_interleave if we can't
44585 move from GPR to SSE register directly. */
44586 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44587 break;
44589 n = GET_MODE_NUNITS (mode);
44590 for (i = 0; i < n; i++)
44591 ops[i] = XVECEXP (vals, 0, i);
44592 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44593 return;
44595 case V4HImode:
44596 case V8QImode:
44597 break;
44599 default:
44600 gcc_unreachable ();
44604 int i, j, n_elts, n_words, n_elt_per_word;
44605 machine_mode inner_mode;
44606 rtx words[4], shift;
44608 inner_mode = GET_MODE_INNER (mode);
44609 n_elts = GET_MODE_NUNITS (mode);
44610 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44611 n_elt_per_word = n_elts / n_words;
44612 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44614 for (i = 0; i < n_words; ++i)
44616 rtx word = NULL_RTX;
44618 for (j = 0; j < n_elt_per_word; ++j)
44620 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44621 elt = convert_modes (word_mode, inner_mode, elt, true);
44623 if (j == 0)
44624 word = elt;
44625 else
44627 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44628 word, 1, OPTAB_LIB_WIDEN);
44629 word = expand_simple_binop (word_mode, IOR, word, elt,
44630 word, 1, OPTAB_LIB_WIDEN);
44634 words[i] = word;
44637 if (n_words == 1)
44638 emit_move_insn (target, gen_lowpart (mode, words[0]));
44639 else if (n_words == 2)
44641 rtx tmp = gen_reg_rtx (mode);
44642 emit_clobber (tmp);
44643 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44644 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44645 emit_move_insn (target, tmp);
44647 else if (n_words == 4)
44649 rtx tmp = gen_reg_rtx (V4SImode);
44650 gcc_assert (word_mode == SImode);
44651 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44652 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44653 emit_move_insn (target, gen_lowpart (mode, tmp));
44655 else
44656 gcc_unreachable ();
44660 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44661 instructions unless MMX_OK is true. */
44663 void
44664 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44666 machine_mode mode = GET_MODE (target);
44667 machine_mode inner_mode = GET_MODE_INNER (mode);
44668 int n_elts = GET_MODE_NUNITS (mode);
44669 int n_var = 0, one_var = -1;
44670 bool all_same = true, all_const_zero = true;
44671 int i;
44672 rtx x;
44674 for (i = 0; i < n_elts; ++i)
44676 x = XVECEXP (vals, 0, i);
44677 if (!(CONST_INT_P (x)
44678 || GET_CODE (x) == CONST_DOUBLE
44679 || GET_CODE (x) == CONST_FIXED))
44680 n_var++, one_var = i;
44681 else if (x != CONST0_RTX (inner_mode))
44682 all_const_zero = false;
44683 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44684 all_same = false;
44687 /* Constants are best loaded from the constant pool. */
44688 if (n_var == 0)
44690 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44691 return;
44694 /* If all values are identical, broadcast the value. */
44695 if (all_same
44696 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44697 XVECEXP (vals, 0, 0)))
44698 return;
44700 /* Values where only one field is non-constant are best loaded from
44701 the pool and overwritten via move later. */
44702 if (n_var == 1)
44704 if (all_const_zero
44705 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44706 XVECEXP (vals, 0, one_var),
44707 one_var))
44708 return;
44710 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44711 return;
44714 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44717 void
44718 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44720 machine_mode mode = GET_MODE (target);
44721 machine_mode inner_mode = GET_MODE_INNER (mode);
44722 machine_mode half_mode;
44723 bool use_vec_merge = false;
44724 rtx tmp;
44725 static rtx (*gen_extract[6][2]) (rtx, rtx)
44727 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44728 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44729 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44730 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44731 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44732 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44734 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44736 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44737 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44738 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44739 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44740 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44741 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44743 int i, j, n;
44745 switch (mode)
44747 case V2SFmode:
44748 case V2SImode:
44749 if (mmx_ok)
44751 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44752 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44753 if (elt == 0)
44754 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44755 else
44756 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44757 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44758 return;
44760 break;
44762 case V2DImode:
44763 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44764 if (use_vec_merge)
44765 break;
44767 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44768 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44769 if (elt == 0)
44770 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44771 else
44772 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44773 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44774 return;
44776 case V2DFmode:
44778 rtx op0, op1;
44780 /* For the two element vectors, we implement a VEC_CONCAT with
44781 the extraction of the other element. */
44783 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44784 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44786 if (elt == 0)
44787 op0 = val, op1 = tmp;
44788 else
44789 op0 = tmp, op1 = val;
44791 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44792 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44794 return;
44796 case V4SFmode:
44797 use_vec_merge = TARGET_SSE4_1;
44798 if (use_vec_merge)
44799 break;
44801 switch (elt)
44803 case 0:
44804 use_vec_merge = true;
44805 break;
44807 case 1:
44808 /* tmp = target = A B C D */
44809 tmp = copy_to_reg (target);
44810 /* target = A A B B */
44811 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44812 /* target = X A B B */
44813 ix86_expand_vector_set (false, target, val, 0);
44814 /* target = A X C D */
44815 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44816 const1_rtx, const0_rtx,
44817 GEN_INT (2+4), GEN_INT (3+4)));
44818 return;
44820 case 2:
44821 /* tmp = target = A B C D */
44822 tmp = copy_to_reg (target);
44823 /* tmp = X B C D */
44824 ix86_expand_vector_set (false, tmp, val, 0);
44825 /* target = A B X D */
44826 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44827 const0_rtx, const1_rtx,
44828 GEN_INT (0+4), GEN_INT (3+4)));
44829 return;
44831 case 3:
44832 /* tmp = target = A B C D */
44833 tmp = copy_to_reg (target);
44834 /* tmp = X B C D */
44835 ix86_expand_vector_set (false, tmp, val, 0);
44836 /* target = A B X D */
44837 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44838 const0_rtx, const1_rtx,
44839 GEN_INT (2+4), GEN_INT (0+4)));
44840 return;
44842 default:
44843 gcc_unreachable ();
44845 break;
44847 case V4SImode:
44848 use_vec_merge = TARGET_SSE4_1;
44849 if (use_vec_merge)
44850 break;
44852 /* Element 0 handled by vec_merge below. */
44853 if (elt == 0)
44855 use_vec_merge = true;
44856 break;
44859 if (TARGET_SSE2)
44861 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44862 store into element 0, then shuffle them back. */
44864 rtx order[4];
44866 order[0] = GEN_INT (elt);
44867 order[1] = const1_rtx;
44868 order[2] = const2_rtx;
44869 order[3] = GEN_INT (3);
44870 order[elt] = const0_rtx;
44872 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44873 order[1], order[2], order[3]));
44875 ix86_expand_vector_set (false, target, val, 0);
44877 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44878 order[1], order[2], order[3]));
44880 else
44882 /* For SSE1, we have to reuse the V4SF code. */
44883 rtx t = gen_reg_rtx (V4SFmode);
44884 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44885 emit_move_insn (target, gen_lowpart (mode, t));
44887 return;
44889 case V8HImode:
44890 use_vec_merge = TARGET_SSE2;
44891 break;
44892 case V4HImode:
44893 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44894 break;
44896 case V16QImode:
44897 use_vec_merge = TARGET_SSE4_1;
44898 break;
44900 case V8QImode:
44901 break;
44903 case V32QImode:
44904 half_mode = V16QImode;
44905 j = 0;
44906 n = 16;
44907 goto half;
44909 case V16HImode:
44910 half_mode = V8HImode;
44911 j = 1;
44912 n = 8;
44913 goto half;
44915 case V8SImode:
44916 half_mode = V4SImode;
44917 j = 2;
44918 n = 4;
44919 goto half;
44921 case V4DImode:
44922 half_mode = V2DImode;
44923 j = 3;
44924 n = 2;
44925 goto half;
44927 case V8SFmode:
44928 half_mode = V4SFmode;
44929 j = 4;
44930 n = 4;
44931 goto half;
44933 case V4DFmode:
44934 half_mode = V2DFmode;
44935 j = 5;
44936 n = 2;
44937 goto half;
44939 half:
44940 /* Compute offset. */
44941 i = elt / n;
44942 elt %= n;
44944 gcc_assert (i <= 1);
44946 /* Extract the half. */
44947 tmp = gen_reg_rtx (half_mode);
44948 emit_insn (gen_extract[j][i] (tmp, target));
44950 /* Put val in tmp at elt. */
44951 ix86_expand_vector_set (false, tmp, val, elt);
44953 /* Put it back. */
44954 emit_insn (gen_insert[j][i] (target, target, tmp));
44955 return;
44957 case V8DFmode:
44958 if (TARGET_AVX512F)
44960 tmp = gen_reg_rtx (mode);
44961 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44962 gen_rtx_VEC_DUPLICATE (mode, val)));
44963 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44964 force_reg (QImode, GEN_INT (1 << elt))));
44965 return;
44967 else
44968 break;
44969 case V8DImode:
44970 if (TARGET_AVX512F)
44972 tmp = gen_reg_rtx (mode);
44973 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44974 gen_rtx_VEC_DUPLICATE (mode, val)));
44975 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44976 force_reg (QImode, GEN_INT (1 << elt))));
44977 return;
44979 else
44980 break;
44981 case V16SFmode:
44982 if (TARGET_AVX512F)
44984 tmp = gen_reg_rtx (mode);
44985 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44986 gen_rtx_VEC_DUPLICATE (mode, val)));
44987 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44988 force_reg (HImode, GEN_INT (1 << elt))));
44989 return;
44991 else
44992 break;
44993 case V16SImode:
44994 if (TARGET_AVX512F)
44996 tmp = gen_reg_rtx (mode);
44997 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44998 gen_rtx_VEC_DUPLICATE (mode, val)));
44999 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
45000 force_reg (HImode, GEN_INT (1 << elt))));
45001 return;
45003 else
45004 break;
45005 case V32HImode:
45006 if (TARGET_AVX512F && TARGET_AVX512BW)
45008 tmp = gen_reg_rtx (mode);
45009 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45010 gen_rtx_VEC_DUPLICATE (mode, val)));
45011 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
45012 force_reg (SImode, GEN_INT (1 << elt))));
45013 return;
45015 else
45016 break;
45017 case V64QImode:
45018 if (TARGET_AVX512F && TARGET_AVX512BW)
45020 tmp = gen_reg_rtx (mode);
45021 emit_insn (gen_rtx_SET (VOIDmode, tmp,
45022 gen_rtx_VEC_DUPLICATE (mode, val)));
45023 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
45024 force_reg (DImode, GEN_INT (1 << elt))));
45025 return;
45027 else
45028 break;
45030 default:
45031 break;
45034 if (use_vec_merge)
45036 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45037 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45038 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45040 else
45042 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45044 emit_move_insn (mem, target);
45046 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45047 emit_move_insn (tmp, val);
45049 emit_move_insn (target, mem);
45053 void
45054 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45056 machine_mode mode = GET_MODE (vec);
45057 machine_mode inner_mode = GET_MODE_INNER (mode);
45058 bool use_vec_extr = false;
45059 rtx tmp;
45061 switch (mode)
45063 case V2SImode:
45064 case V2SFmode:
45065 if (!mmx_ok)
45066 break;
45067 /* FALLTHRU */
45069 case V2DFmode:
45070 case V2DImode:
45071 use_vec_extr = true;
45072 break;
45074 case V4SFmode:
45075 use_vec_extr = TARGET_SSE4_1;
45076 if (use_vec_extr)
45077 break;
45079 switch (elt)
45081 case 0:
45082 tmp = vec;
45083 break;
45085 case 1:
45086 case 3:
45087 tmp = gen_reg_rtx (mode);
45088 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45089 GEN_INT (elt), GEN_INT (elt),
45090 GEN_INT (elt+4), GEN_INT (elt+4)));
45091 break;
45093 case 2:
45094 tmp = gen_reg_rtx (mode);
45095 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45096 break;
45098 default:
45099 gcc_unreachable ();
45101 vec = tmp;
45102 use_vec_extr = true;
45103 elt = 0;
45104 break;
45106 case V4SImode:
45107 use_vec_extr = TARGET_SSE4_1;
45108 if (use_vec_extr)
45109 break;
45111 if (TARGET_SSE2)
45113 switch (elt)
45115 case 0:
45116 tmp = vec;
45117 break;
45119 case 1:
45120 case 3:
45121 tmp = gen_reg_rtx (mode);
45122 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45123 GEN_INT (elt), GEN_INT (elt),
45124 GEN_INT (elt), GEN_INT (elt)));
45125 break;
45127 case 2:
45128 tmp = gen_reg_rtx (mode);
45129 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45130 break;
45132 default:
45133 gcc_unreachable ();
45135 vec = tmp;
45136 use_vec_extr = true;
45137 elt = 0;
45139 else
45141 /* For SSE1, we have to reuse the V4SF code. */
45142 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45143 gen_lowpart (V4SFmode, vec), elt);
45144 return;
45146 break;
45148 case V8HImode:
45149 use_vec_extr = TARGET_SSE2;
45150 break;
45151 case V4HImode:
45152 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45153 break;
45155 case V16QImode:
45156 use_vec_extr = TARGET_SSE4_1;
45157 break;
45159 case V8SFmode:
45160 if (TARGET_AVX)
45162 tmp = gen_reg_rtx (V4SFmode);
45163 if (elt < 4)
45164 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45165 else
45166 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45167 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45168 return;
45170 break;
45172 case V4DFmode:
45173 if (TARGET_AVX)
45175 tmp = gen_reg_rtx (V2DFmode);
45176 if (elt < 2)
45177 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45178 else
45179 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45180 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45181 return;
45183 break;
45185 case V32QImode:
45186 if (TARGET_AVX)
45188 tmp = gen_reg_rtx (V16QImode);
45189 if (elt < 16)
45190 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45191 else
45192 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45193 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45194 return;
45196 break;
45198 case V16HImode:
45199 if (TARGET_AVX)
45201 tmp = gen_reg_rtx (V8HImode);
45202 if (elt < 8)
45203 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45204 else
45205 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45206 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45207 return;
45209 break;
45211 case V8SImode:
45212 if (TARGET_AVX)
45214 tmp = gen_reg_rtx (V4SImode);
45215 if (elt < 4)
45216 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45217 else
45218 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45219 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45220 return;
45222 break;
45224 case V4DImode:
45225 if (TARGET_AVX)
45227 tmp = gen_reg_rtx (V2DImode);
45228 if (elt < 2)
45229 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45230 else
45231 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45232 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45233 return;
45235 break;
45237 case V32HImode:
45238 if (TARGET_AVX512BW)
45240 tmp = gen_reg_rtx (V16HImode);
45241 if (elt < 16)
45242 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45243 else
45244 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45245 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45246 return;
45248 break;
45250 case V64QImode:
45251 if (TARGET_AVX512BW)
45253 tmp = gen_reg_rtx (V32QImode);
45254 if (elt < 32)
45255 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45256 else
45257 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45258 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45259 return;
45261 break;
45263 case V16SFmode:
45264 tmp = gen_reg_rtx (V8SFmode);
45265 if (elt < 8)
45266 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45267 else
45268 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45269 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45270 return;
45272 case V8DFmode:
45273 tmp = gen_reg_rtx (V4DFmode);
45274 if (elt < 4)
45275 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45276 else
45277 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45278 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45279 return;
45281 case V16SImode:
45282 tmp = gen_reg_rtx (V8SImode);
45283 if (elt < 8)
45284 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45285 else
45286 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45287 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45288 return;
45290 case V8DImode:
45291 tmp = gen_reg_rtx (V4DImode);
45292 if (elt < 4)
45293 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45294 else
45295 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45296 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45297 return;
45299 case V8QImode:
45300 /* ??? Could extract the appropriate HImode element and shift. */
45301 default:
45302 break;
45305 if (use_vec_extr)
45307 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45308 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45310 /* Let the rtl optimizers know about the zero extension performed. */
45311 if (inner_mode == QImode || inner_mode == HImode)
45313 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45314 target = gen_lowpart (SImode, target);
45317 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45319 else
45321 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45323 emit_move_insn (mem, vec);
45325 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45326 emit_move_insn (target, tmp);
45330 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45331 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45332 The upper bits of DEST are undefined, though they shouldn't cause
45333 exceptions (some bits from src or all zeros are ok). */
45335 static void
45336 emit_reduc_half (rtx dest, rtx src, int i)
45338 rtx tem, d = dest;
45339 switch (GET_MODE (src))
45341 case V4SFmode:
45342 if (i == 128)
45343 tem = gen_sse_movhlps (dest, src, src);
45344 else
45345 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45346 GEN_INT (1 + 4), GEN_INT (1 + 4));
45347 break;
45348 case V2DFmode:
45349 tem = gen_vec_interleave_highv2df (dest, src, src);
45350 break;
45351 case V16QImode:
45352 case V8HImode:
45353 case V4SImode:
45354 case V2DImode:
45355 d = gen_reg_rtx (V1TImode);
45356 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45357 GEN_INT (i / 2));
45358 break;
45359 case V8SFmode:
45360 if (i == 256)
45361 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45362 else
45363 tem = gen_avx_shufps256 (dest, src, src,
45364 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45365 break;
45366 case V4DFmode:
45367 if (i == 256)
45368 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45369 else
45370 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45371 break;
45372 case V32QImode:
45373 case V16HImode:
45374 case V8SImode:
45375 case V4DImode:
45376 if (i == 256)
45378 if (GET_MODE (dest) != V4DImode)
45379 d = gen_reg_rtx (V4DImode);
45380 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45381 gen_lowpart (V4DImode, src),
45382 const1_rtx);
45384 else
45386 d = gen_reg_rtx (V2TImode);
45387 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45388 GEN_INT (i / 2));
45390 break;
45391 case V64QImode:
45392 case V32HImode:
45393 case V16SImode:
45394 case V16SFmode:
45395 case V8DImode:
45396 case V8DFmode:
45397 if (i > 128)
45398 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45399 gen_lowpart (V16SImode, src),
45400 gen_lowpart (V16SImode, src),
45401 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45402 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45403 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45404 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45405 GEN_INT (0xC), GEN_INT (0xD),
45406 GEN_INT (0xE), GEN_INT (0xF),
45407 GEN_INT (0x10), GEN_INT (0x11),
45408 GEN_INT (0x12), GEN_INT (0x13),
45409 GEN_INT (0x14), GEN_INT (0x15),
45410 GEN_INT (0x16), GEN_INT (0x17));
45411 else
45412 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45413 gen_lowpart (V16SImode, src),
45414 GEN_INT (i == 128 ? 0x2 : 0x1),
45415 GEN_INT (0x3),
45416 GEN_INT (0x3),
45417 GEN_INT (0x3),
45418 GEN_INT (i == 128 ? 0x6 : 0x5),
45419 GEN_INT (0x7),
45420 GEN_INT (0x7),
45421 GEN_INT (0x7),
45422 GEN_INT (i == 128 ? 0xA : 0x9),
45423 GEN_INT (0xB),
45424 GEN_INT (0xB),
45425 GEN_INT (0xB),
45426 GEN_INT (i == 128 ? 0xE : 0xD),
45427 GEN_INT (0xF),
45428 GEN_INT (0xF),
45429 GEN_INT (0xF));
45430 break;
45431 default:
45432 gcc_unreachable ();
45434 emit_insn (tem);
45435 if (d != dest)
45436 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45439 /* Expand a vector reduction. FN is the binary pattern to reduce;
45440 DEST is the destination; IN is the input vector. */
45442 void
45443 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45445 rtx half, dst, vec = in;
45446 machine_mode mode = GET_MODE (in);
45447 int i;
45449 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45450 if (TARGET_SSE4_1
45451 && mode == V8HImode
45452 && fn == gen_uminv8hi3)
45454 emit_insn (gen_sse4_1_phminposuw (dest, in));
45455 return;
45458 for (i = GET_MODE_BITSIZE (mode);
45459 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45460 i >>= 1)
45462 half = gen_reg_rtx (mode);
45463 emit_reduc_half (half, vec, i);
45464 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45465 dst = dest;
45466 else
45467 dst = gen_reg_rtx (mode);
45468 emit_insn (fn (dst, half, vec));
45469 vec = dst;
45473 /* Target hook for scalar_mode_supported_p. */
45474 static bool
45475 ix86_scalar_mode_supported_p (machine_mode mode)
45477 if (DECIMAL_FLOAT_MODE_P (mode))
45478 return default_decimal_float_supported_p ();
45479 else if (mode == TFmode)
45480 return true;
45481 else
45482 return default_scalar_mode_supported_p (mode);
45485 /* Implements target hook vector_mode_supported_p. */
45486 static bool
45487 ix86_vector_mode_supported_p (machine_mode mode)
45489 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45490 return true;
45491 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45492 return true;
45493 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45494 return true;
45495 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45496 return true;
45497 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45498 return true;
45499 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45500 return true;
45501 return false;
45504 /* Implement target hook libgcc_floating_mode_supported_p. */
45505 static bool
45506 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45508 switch (mode)
45510 case SFmode:
45511 case DFmode:
45512 case XFmode:
45513 return true;
45515 case TFmode:
45516 #ifdef IX86_NO_LIBGCC_TFMODE
45517 return false;
45518 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45519 return TARGET_LONG_DOUBLE_128;
45520 #else
45521 return true;
45522 #endif
45524 default:
45525 return false;
45529 /* Target hook for c_mode_for_suffix. */
45530 static machine_mode
45531 ix86_c_mode_for_suffix (char suffix)
45533 if (suffix == 'q')
45534 return TFmode;
45535 if (suffix == 'w')
45536 return XFmode;
45538 return VOIDmode;
45541 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45543 We do this in the new i386 backend to maintain source compatibility
45544 with the old cc0-based compiler. */
45546 static tree
45547 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45549 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45550 clobbers);
45551 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45552 clobbers);
45553 return clobbers;
45556 /* Implements target vector targetm.asm.encode_section_info. */
45558 static void ATTRIBUTE_UNUSED
45559 ix86_encode_section_info (tree decl, rtx rtl, int first)
45561 default_encode_section_info (decl, rtl, first);
45563 if (ix86_in_large_data_p (decl))
45564 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45567 /* Worker function for REVERSE_CONDITION. */
45569 enum rtx_code
45570 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45572 return (mode != CCFPmode && mode != CCFPUmode
45573 ? reverse_condition (code)
45574 : reverse_condition_maybe_unordered (code));
45577 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45578 to OPERANDS[0]. */
45580 const char *
45581 output_387_reg_move (rtx insn, rtx *operands)
45583 if (REG_P (operands[0]))
45585 if (REG_P (operands[1])
45586 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45588 if (REGNO (operands[0]) == FIRST_STACK_REG)
45589 return output_387_ffreep (operands, 0);
45590 return "fstp\t%y0";
45592 if (STACK_TOP_P (operands[0]))
45593 return "fld%Z1\t%y1";
45594 return "fst\t%y0";
45596 else if (MEM_P (operands[0]))
45598 gcc_assert (REG_P (operands[1]));
45599 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45600 return "fstp%Z0\t%y0";
45601 else
45603 /* There is no non-popping store to memory for XFmode.
45604 So if we need one, follow the store with a load. */
45605 if (GET_MODE (operands[0]) == XFmode)
45606 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45607 else
45608 return "fst%Z0\t%y0";
45611 else
45612 gcc_unreachable();
45615 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45616 FP status register is set. */
45618 void
45619 ix86_emit_fp_unordered_jump (rtx label)
45621 rtx reg = gen_reg_rtx (HImode);
45622 rtx temp;
45624 emit_insn (gen_x86_fnstsw_1 (reg));
45626 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45628 emit_insn (gen_x86_sahf_1 (reg));
45630 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45631 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45633 else
45635 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45637 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45638 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45641 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45642 gen_rtx_LABEL_REF (VOIDmode, label),
45643 pc_rtx);
45644 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45646 emit_jump_insn (temp);
45647 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45650 /* Output code to perform a log1p XFmode calculation. */
45652 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45654 rtx_code_label *label1 = gen_label_rtx ();
45655 rtx_code_label *label2 = gen_label_rtx ();
45657 rtx tmp = gen_reg_rtx (XFmode);
45658 rtx tmp2 = gen_reg_rtx (XFmode);
45659 rtx test;
45661 emit_insn (gen_absxf2 (tmp, op1));
45662 test = gen_rtx_GE (VOIDmode, tmp,
45663 CONST_DOUBLE_FROM_REAL_VALUE (
45664 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45665 XFmode));
45666 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45668 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45669 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45670 emit_jump (label2);
45672 emit_label (label1);
45673 emit_move_insn (tmp, CONST1_RTX (XFmode));
45674 emit_insn (gen_addxf3 (tmp, op1, tmp));
45675 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45676 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45678 emit_label (label2);
45681 /* Emit code for round calculation. */
45682 void ix86_emit_i387_round (rtx op0, rtx op1)
45684 machine_mode inmode = GET_MODE (op1);
45685 machine_mode outmode = GET_MODE (op0);
45686 rtx e1, e2, res, tmp, tmp1, half;
45687 rtx scratch = gen_reg_rtx (HImode);
45688 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45689 rtx_code_label *jump_label = gen_label_rtx ();
45690 rtx insn;
45691 rtx (*gen_abs) (rtx, rtx);
45692 rtx (*gen_neg) (rtx, rtx);
45694 switch (inmode)
45696 case SFmode:
45697 gen_abs = gen_abssf2;
45698 break;
45699 case DFmode:
45700 gen_abs = gen_absdf2;
45701 break;
45702 case XFmode:
45703 gen_abs = gen_absxf2;
45704 break;
45705 default:
45706 gcc_unreachable ();
45709 switch (outmode)
45711 case SFmode:
45712 gen_neg = gen_negsf2;
45713 break;
45714 case DFmode:
45715 gen_neg = gen_negdf2;
45716 break;
45717 case XFmode:
45718 gen_neg = gen_negxf2;
45719 break;
45720 case HImode:
45721 gen_neg = gen_neghi2;
45722 break;
45723 case SImode:
45724 gen_neg = gen_negsi2;
45725 break;
45726 case DImode:
45727 gen_neg = gen_negdi2;
45728 break;
45729 default:
45730 gcc_unreachable ();
45733 e1 = gen_reg_rtx (inmode);
45734 e2 = gen_reg_rtx (inmode);
45735 res = gen_reg_rtx (outmode);
45737 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45739 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45741 /* scratch = fxam(op1) */
45742 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45743 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45744 UNSPEC_FXAM)));
45745 /* e1 = fabs(op1) */
45746 emit_insn (gen_abs (e1, op1));
45748 /* e2 = e1 + 0.5 */
45749 half = force_reg (inmode, half);
45750 emit_insn (gen_rtx_SET (VOIDmode, e2,
45751 gen_rtx_PLUS (inmode, e1, half)));
45753 /* res = floor(e2) */
45754 if (inmode != XFmode)
45756 tmp1 = gen_reg_rtx (XFmode);
45758 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45759 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45761 else
45762 tmp1 = e2;
45764 switch (outmode)
45766 case SFmode:
45767 case DFmode:
45769 rtx tmp0 = gen_reg_rtx (XFmode);
45771 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45773 emit_insn (gen_rtx_SET (VOIDmode, res,
45774 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45775 UNSPEC_TRUNC_NOOP)));
45777 break;
45778 case XFmode:
45779 emit_insn (gen_frndintxf2_floor (res, tmp1));
45780 break;
45781 case HImode:
45782 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45783 break;
45784 case SImode:
45785 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45786 break;
45787 case DImode:
45788 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45789 break;
45790 default:
45791 gcc_unreachable ();
45794 /* flags = signbit(a) */
45795 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45797 /* if (flags) then res = -res */
45798 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45799 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45800 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45801 pc_rtx);
45802 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45803 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45804 JUMP_LABEL (insn) = jump_label;
45806 emit_insn (gen_neg (res, res));
45808 emit_label (jump_label);
45809 LABEL_NUSES (jump_label) = 1;
45811 emit_move_insn (op0, res);
45814 /* Output code to perform a Newton-Rhapson approximation of a single precision
45815 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45817 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45819 rtx x0, x1, e0, e1;
45821 x0 = gen_reg_rtx (mode);
45822 e0 = gen_reg_rtx (mode);
45823 e1 = gen_reg_rtx (mode);
45824 x1 = gen_reg_rtx (mode);
45826 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45828 b = force_reg (mode, b);
45830 /* x0 = rcp(b) estimate */
45831 if (mode == V16SFmode || mode == V8DFmode)
45832 emit_insn (gen_rtx_SET (VOIDmode, x0,
45833 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45834 UNSPEC_RCP14)));
45835 else
45836 emit_insn (gen_rtx_SET (VOIDmode, x0,
45837 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45838 UNSPEC_RCP)));
45840 /* e0 = x0 * b */
45841 emit_insn (gen_rtx_SET (VOIDmode, e0,
45842 gen_rtx_MULT (mode, x0, b)));
45844 /* e0 = x0 * e0 */
45845 emit_insn (gen_rtx_SET (VOIDmode, e0,
45846 gen_rtx_MULT (mode, x0, e0)));
45848 /* e1 = x0 + x0 */
45849 emit_insn (gen_rtx_SET (VOIDmode, e1,
45850 gen_rtx_PLUS (mode, x0, x0)));
45852 /* x1 = e1 - e0 */
45853 emit_insn (gen_rtx_SET (VOIDmode, x1,
45854 gen_rtx_MINUS (mode, e1, e0)));
45856 /* res = a * x1 */
45857 emit_insn (gen_rtx_SET (VOIDmode, res,
45858 gen_rtx_MULT (mode, a, x1)));
45861 /* Output code to perform a Newton-Rhapson approximation of a
45862 single precision floating point [reciprocal] square root. */
45864 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45865 bool recip)
45867 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45868 REAL_VALUE_TYPE r;
45869 int unspec;
45871 x0 = gen_reg_rtx (mode);
45872 e0 = gen_reg_rtx (mode);
45873 e1 = gen_reg_rtx (mode);
45874 e2 = gen_reg_rtx (mode);
45875 e3 = gen_reg_rtx (mode);
45877 real_from_integer (&r, VOIDmode, -3, SIGNED);
45878 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45880 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45881 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45882 unspec = UNSPEC_RSQRT;
45884 if (VECTOR_MODE_P (mode))
45886 mthree = ix86_build_const_vector (mode, true, mthree);
45887 mhalf = ix86_build_const_vector (mode, true, mhalf);
45888 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45889 if (GET_MODE_SIZE (mode) == 64)
45890 unspec = UNSPEC_RSQRT14;
45893 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45894 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45896 a = force_reg (mode, a);
45898 /* x0 = rsqrt(a) estimate */
45899 emit_insn (gen_rtx_SET (VOIDmode, x0,
45900 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45901 unspec)));
45903 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45904 if (!recip)
45906 rtx zero, mask;
45908 zero = gen_reg_rtx (mode);
45909 mask = gen_reg_rtx (mode);
45911 zero = force_reg (mode, CONST0_RTX(mode));
45913 /* Handle masked compare. */
45914 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45916 mask = gen_reg_rtx (HImode);
45917 /* Imm value 0x4 corresponds to not-equal comparison. */
45918 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45919 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45921 else
45923 emit_insn (gen_rtx_SET (VOIDmode, mask,
45924 gen_rtx_NE (mode, zero, a)));
45926 emit_insn (gen_rtx_SET (VOIDmode, x0,
45927 gen_rtx_AND (mode, x0, mask)));
45931 /* e0 = x0 * a */
45932 emit_insn (gen_rtx_SET (VOIDmode, e0,
45933 gen_rtx_MULT (mode, x0, a)));
45934 /* e1 = e0 * x0 */
45935 emit_insn (gen_rtx_SET (VOIDmode, e1,
45936 gen_rtx_MULT (mode, e0, x0)));
45938 /* e2 = e1 - 3. */
45939 mthree = force_reg (mode, mthree);
45940 emit_insn (gen_rtx_SET (VOIDmode, e2,
45941 gen_rtx_PLUS (mode, e1, mthree)));
45943 mhalf = force_reg (mode, mhalf);
45944 if (recip)
45945 /* e3 = -.5 * x0 */
45946 emit_insn (gen_rtx_SET (VOIDmode, e3,
45947 gen_rtx_MULT (mode, x0, mhalf)));
45948 else
45949 /* e3 = -.5 * e0 */
45950 emit_insn (gen_rtx_SET (VOIDmode, e3,
45951 gen_rtx_MULT (mode, e0, mhalf)));
45952 /* ret = e2 * e3 */
45953 emit_insn (gen_rtx_SET (VOIDmode, res,
45954 gen_rtx_MULT (mode, e2, e3)));
45957 #ifdef TARGET_SOLARIS
45958 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45960 static void
45961 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45962 tree decl)
45964 /* With Binutils 2.15, the "@unwind" marker must be specified on
45965 every occurrence of the ".eh_frame" section, not just the first
45966 one. */
45967 if (TARGET_64BIT
45968 && strcmp (name, ".eh_frame") == 0)
45970 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45971 flags & SECTION_WRITE ? "aw" : "a");
45972 return;
45975 #ifndef USE_GAS
45976 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45978 solaris_elf_asm_comdat_section (name, flags, decl);
45979 return;
45981 #endif
45983 default_elf_asm_named_section (name, flags, decl);
45985 #endif /* TARGET_SOLARIS */
45987 /* Return the mangling of TYPE if it is an extended fundamental type. */
45989 static const char *
45990 ix86_mangle_type (const_tree type)
45992 type = TYPE_MAIN_VARIANT (type);
45994 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45995 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45996 return NULL;
45998 switch (TYPE_MODE (type))
46000 case TFmode:
46001 /* __float128 is "g". */
46002 return "g";
46003 case XFmode:
46004 /* "long double" or __float80 is "e". */
46005 return "e";
46006 default:
46007 return NULL;
46011 /* For 32-bit code we can save PIC register setup by using
46012 __stack_chk_fail_local hidden function instead of calling
46013 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46014 register, so it is better to call __stack_chk_fail directly. */
46016 static tree ATTRIBUTE_UNUSED
46017 ix86_stack_protect_fail (void)
46019 return TARGET_64BIT
46020 ? default_external_stack_protect_fail ()
46021 : default_hidden_stack_protect_fail ();
46024 /* Select a format to encode pointers in exception handling data. CODE
46025 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46026 true if the symbol may be affected by dynamic relocations.
46028 ??? All x86 object file formats are capable of representing this.
46029 After all, the relocation needed is the same as for the call insn.
46030 Whether or not a particular assembler allows us to enter such, I
46031 guess we'll have to see. */
46033 asm_preferred_eh_data_format (int code, int global)
46035 if (flag_pic)
46037 int type = DW_EH_PE_sdata8;
46038 if (!TARGET_64BIT
46039 || ix86_cmodel == CM_SMALL_PIC
46040 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46041 type = DW_EH_PE_sdata4;
46042 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46044 if (ix86_cmodel == CM_SMALL
46045 || (ix86_cmodel == CM_MEDIUM && code))
46046 return DW_EH_PE_udata4;
46047 return DW_EH_PE_absptr;
46050 /* Expand copysign from SIGN to the positive value ABS_VALUE
46051 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46052 the sign-bit. */
46053 static void
46054 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46056 machine_mode mode = GET_MODE (sign);
46057 rtx sgn = gen_reg_rtx (mode);
46058 if (mask == NULL_RTX)
46060 machine_mode vmode;
46062 if (mode == SFmode)
46063 vmode = V4SFmode;
46064 else if (mode == DFmode)
46065 vmode = V2DFmode;
46066 else
46067 vmode = mode;
46069 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46070 if (!VECTOR_MODE_P (mode))
46072 /* We need to generate a scalar mode mask in this case. */
46073 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46074 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46075 mask = gen_reg_rtx (mode);
46076 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46079 else
46080 mask = gen_rtx_NOT (mode, mask);
46081 emit_insn (gen_rtx_SET (VOIDmode, sgn,
46082 gen_rtx_AND (mode, mask, sign)));
46083 emit_insn (gen_rtx_SET (VOIDmode, result,
46084 gen_rtx_IOR (mode, abs_value, sgn)));
46087 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46088 mask for masking out the sign-bit is stored in *SMASK, if that is
46089 non-null. */
46090 static rtx
46091 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46093 machine_mode vmode, mode = GET_MODE (op0);
46094 rtx xa, mask;
46096 xa = gen_reg_rtx (mode);
46097 if (mode == SFmode)
46098 vmode = V4SFmode;
46099 else if (mode == DFmode)
46100 vmode = V2DFmode;
46101 else
46102 vmode = mode;
46103 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46104 if (!VECTOR_MODE_P (mode))
46106 /* We need to generate a scalar mode mask in this case. */
46107 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46108 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46109 mask = gen_reg_rtx (mode);
46110 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46112 emit_insn (gen_rtx_SET (VOIDmode, xa,
46113 gen_rtx_AND (mode, op0, mask)));
46115 if (smask)
46116 *smask = mask;
46118 return xa;
46121 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46122 swapping the operands if SWAP_OPERANDS is true. The expanded
46123 code is a forward jump to a newly created label in case the
46124 comparison is true. The generated label rtx is returned. */
46125 static rtx_code_label *
46126 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46127 bool swap_operands)
46129 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46130 rtx_code_label *label;
46131 rtx tmp;
46133 if (swap_operands)
46134 std::swap (op0, op1);
46136 label = gen_label_rtx ();
46137 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46138 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46139 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46140 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46141 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46142 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46143 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46144 JUMP_LABEL (tmp) = label;
46146 return label;
46149 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46150 using comparison code CODE. Operands are swapped for the comparison if
46151 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46152 static rtx
46153 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46154 bool swap_operands)
46156 rtx (*insn)(rtx, rtx, rtx, rtx);
46157 machine_mode mode = GET_MODE (op0);
46158 rtx mask = gen_reg_rtx (mode);
46160 if (swap_operands)
46161 std::swap (op0, op1);
46163 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46165 emit_insn (insn (mask, op0, op1,
46166 gen_rtx_fmt_ee (code, mode, op0, op1)));
46167 return mask;
46170 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46171 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46172 static rtx
46173 ix86_gen_TWO52 (machine_mode mode)
46175 REAL_VALUE_TYPE TWO52r;
46176 rtx TWO52;
46178 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46179 TWO52 = const_double_from_real_value (TWO52r, mode);
46180 TWO52 = force_reg (mode, TWO52);
46182 return TWO52;
46185 /* Expand SSE sequence for computing lround from OP1 storing
46186 into OP0. */
46187 void
46188 ix86_expand_lround (rtx op0, rtx op1)
46190 /* C code for the stuff we're doing below:
46191 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46192 return (long)tmp;
46194 machine_mode mode = GET_MODE (op1);
46195 const struct real_format *fmt;
46196 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46197 rtx adj;
46199 /* load nextafter (0.5, 0.0) */
46200 fmt = REAL_MODE_FORMAT (mode);
46201 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46202 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46204 /* adj = copysign (0.5, op1) */
46205 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46206 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46208 /* adj = op1 + adj */
46209 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46211 /* op0 = (imode)adj */
46212 expand_fix (op0, adj, 0);
46215 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46216 into OPERAND0. */
46217 void
46218 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46220 /* C code for the stuff we're doing below (for do_floor):
46221 xi = (long)op1;
46222 xi -= (double)xi > op1 ? 1 : 0;
46223 return xi;
46225 machine_mode fmode = GET_MODE (op1);
46226 machine_mode imode = GET_MODE (op0);
46227 rtx ireg, freg, tmp;
46228 rtx_code_label *label;
46230 /* reg = (long)op1 */
46231 ireg = gen_reg_rtx (imode);
46232 expand_fix (ireg, op1, 0);
46234 /* freg = (double)reg */
46235 freg = gen_reg_rtx (fmode);
46236 expand_float (freg, ireg, 0);
46238 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46239 label = ix86_expand_sse_compare_and_jump (UNLE,
46240 freg, op1, !do_floor);
46241 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46242 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46243 emit_move_insn (ireg, tmp);
46245 emit_label (label);
46246 LABEL_NUSES (label) = 1;
46248 emit_move_insn (op0, ireg);
46251 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46252 result in OPERAND0. */
46253 void
46254 ix86_expand_rint (rtx operand0, rtx operand1)
46256 /* C code for the stuff we're doing below:
46257 xa = fabs (operand1);
46258 if (!isless (xa, 2**52))
46259 return operand1;
46260 xa = xa + 2**52 - 2**52;
46261 return copysign (xa, operand1);
46263 machine_mode mode = GET_MODE (operand0);
46264 rtx res, xa, TWO52, mask;
46265 rtx_code_label *label;
46267 res = gen_reg_rtx (mode);
46268 emit_move_insn (res, operand1);
46270 /* xa = abs (operand1) */
46271 xa = ix86_expand_sse_fabs (res, &mask);
46273 /* if (!isless (xa, TWO52)) goto label; */
46274 TWO52 = ix86_gen_TWO52 (mode);
46275 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46277 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46278 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46280 ix86_sse_copysign_to_positive (res, xa, res, mask);
46282 emit_label (label);
46283 LABEL_NUSES (label) = 1;
46285 emit_move_insn (operand0, res);
46288 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46289 into OPERAND0. */
46290 void
46291 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46293 /* C code for the stuff we expand below.
46294 double xa = fabs (x), x2;
46295 if (!isless (xa, TWO52))
46296 return x;
46297 xa = xa + TWO52 - TWO52;
46298 x2 = copysign (xa, x);
46299 Compensate. Floor:
46300 if (x2 > x)
46301 x2 -= 1;
46302 Compensate. Ceil:
46303 if (x2 < x)
46304 x2 -= -1;
46305 return x2;
46307 machine_mode mode = GET_MODE (operand0);
46308 rtx xa, TWO52, tmp, one, res, mask;
46309 rtx_code_label *label;
46311 TWO52 = ix86_gen_TWO52 (mode);
46313 /* Temporary for holding the result, initialized to the input
46314 operand to ease control flow. */
46315 res = gen_reg_rtx (mode);
46316 emit_move_insn (res, operand1);
46318 /* xa = abs (operand1) */
46319 xa = ix86_expand_sse_fabs (res, &mask);
46321 /* if (!isless (xa, TWO52)) goto label; */
46322 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46324 /* xa = xa + TWO52 - TWO52; */
46325 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46326 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46328 /* xa = copysign (xa, operand1) */
46329 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46331 /* generate 1.0 or -1.0 */
46332 one = force_reg (mode,
46333 const_double_from_real_value (do_floor
46334 ? dconst1 : dconstm1, mode));
46336 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46337 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46338 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46339 gen_rtx_AND (mode, one, tmp)));
46340 /* We always need to subtract here to preserve signed zero. */
46341 tmp = expand_simple_binop (mode, MINUS,
46342 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46343 emit_move_insn (res, tmp);
46345 emit_label (label);
46346 LABEL_NUSES (label) = 1;
46348 emit_move_insn (operand0, res);
46351 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46352 into OPERAND0. */
46353 void
46354 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46356 /* C code for the stuff we expand below.
46357 double xa = fabs (x), x2;
46358 if (!isless (xa, TWO52))
46359 return x;
46360 x2 = (double)(long)x;
46361 Compensate. Floor:
46362 if (x2 > x)
46363 x2 -= 1;
46364 Compensate. Ceil:
46365 if (x2 < x)
46366 x2 += 1;
46367 if (HONOR_SIGNED_ZEROS (mode))
46368 return copysign (x2, x);
46369 return x2;
46371 machine_mode mode = GET_MODE (operand0);
46372 rtx xa, xi, TWO52, tmp, one, res, mask;
46373 rtx_code_label *label;
46375 TWO52 = ix86_gen_TWO52 (mode);
46377 /* Temporary for holding the result, initialized to the input
46378 operand to ease control flow. */
46379 res = gen_reg_rtx (mode);
46380 emit_move_insn (res, operand1);
46382 /* xa = abs (operand1) */
46383 xa = ix86_expand_sse_fabs (res, &mask);
46385 /* if (!isless (xa, TWO52)) goto label; */
46386 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46388 /* xa = (double)(long)x */
46389 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46390 expand_fix (xi, res, 0);
46391 expand_float (xa, xi, 0);
46393 /* generate 1.0 */
46394 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46396 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46397 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46398 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46399 gen_rtx_AND (mode, one, tmp)));
46400 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46401 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46402 emit_move_insn (res, tmp);
46404 if (HONOR_SIGNED_ZEROS (mode))
46405 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46407 emit_label (label);
46408 LABEL_NUSES (label) = 1;
46410 emit_move_insn (operand0, res);
46413 /* Expand SSE sequence for computing round from OPERAND1 storing
46414 into OPERAND0. Sequence that works without relying on DImode truncation
46415 via cvttsd2siq that is only available on 64bit targets. */
46416 void
46417 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46419 /* C code for the stuff we expand below.
46420 double xa = fabs (x), xa2, x2;
46421 if (!isless (xa, TWO52))
46422 return x;
46423 Using the absolute value and copying back sign makes
46424 -0.0 -> -0.0 correct.
46425 xa2 = xa + TWO52 - TWO52;
46426 Compensate.
46427 dxa = xa2 - xa;
46428 if (dxa <= -0.5)
46429 xa2 += 1;
46430 else if (dxa > 0.5)
46431 xa2 -= 1;
46432 x2 = copysign (xa2, x);
46433 return x2;
46435 machine_mode mode = GET_MODE (operand0);
46436 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46437 rtx_code_label *label;
46439 TWO52 = ix86_gen_TWO52 (mode);
46441 /* Temporary for holding the result, initialized to the input
46442 operand to ease control flow. */
46443 res = gen_reg_rtx (mode);
46444 emit_move_insn (res, operand1);
46446 /* xa = abs (operand1) */
46447 xa = ix86_expand_sse_fabs (res, &mask);
46449 /* if (!isless (xa, TWO52)) goto label; */
46450 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46452 /* xa2 = xa + TWO52 - TWO52; */
46453 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46454 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46456 /* dxa = xa2 - xa; */
46457 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46459 /* generate 0.5, 1.0 and -0.5 */
46460 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46461 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46462 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46463 0, OPTAB_DIRECT);
46465 /* Compensate. */
46466 tmp = gen_reg_rtx (mode);
46467 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46468 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46469 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46470 gen_rtx_AND (mode, one, tmp)));
46471 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46472 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46473 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46474 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46475 gen_rtx_AND (mode, one, tmp)));
46476 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46478 /* res = copysign (xa2, operand1) */
46479 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46481 emit_label (label);
46482 LABEL_NUSES (label) = 1;
46484 emit_move_insn (operand0, res);
46487 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46488 into OPERAND0. */
46489 void
46490 ix86_expand_trunc (rtx operand0, rtx operand1)
46492 /* C code for SSE variant we expand below.
46493 double xa = fabs (x), x2;
46494 if (!isless (xa, TWO52))
46495 return x;
46496 x2 = (double)(long)x;
46497 if (HONOR_SIGNED_ZEROS (mode))
46498 return copysign (x2, x);
46499 return x2;
46501 machine_mode mode = GET_MODE (operand0);
46502 rtx xa, xi, TWO52, res, mask;
46503 rtx_code_label *label;
46505 TWO52 = ix86_gen_TWO52 (mode);
46507 /* Temporary for holding the result, initialized to the input
46508 operand to ease control flow. */
46509 res = gen_reg_rtx (mode);
46510 emit_move_insn (res, operand1);
46512 /* xa = abs (operand1) */
46513 xa = ix86_expand_sse_fabs (res, &mask);
46515 /* if (!isless (xa, TWO52)) goto label; */
46516 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46518 /* x = (double)(long)x */
46519 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46520 expand_fix (xi, res, 0);
46521 expand_float (res, xi, 0);
46523 if (HONOR_SIGNED_ZEROS (mode))
46524 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46526 emit_label (label);
46527 LABEL_NUSES (label) = 1;
46529 emit_move_insn (operand0, res);
46532 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46533 into OPERAND0. */
46534 void
46535 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46537 machine_mode mode = GET_MODE (operand0);
46538 rtx xa, mask, TWO52, one, res, smask, tmp;
46539 rtx_code_label *label;
46541 /* C code for SSE variant we expand below.
46542 double xa = fabs (x), x2;
46543 if (!isless (xa, TWO52))
46544 return x;
46545 xa2 = xa + TWO52 - TWO52;
46546 Compensate:
46547 if (xa2 > xa)
46548 xa2 -= 1.0;
46549 x2 = copysign (xa2, x);
46550 return x2;
46553 TWO52 = ix86_gen_TWO52 (mode);
46555 /* Temporary for holding the result, initialized to the input
46556 operand to ease control flow. */
46557 res = gen_reg_rtx (mode);
46558 emit_move_insn (res, operand1);
46560 /* xa = abs (operand1) */
46561 xa = ix86_expand_sse_fabs (res, &smask);
46563 /* if (!isless (xa, TWO52)) goto label; */
46564 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46566 /* res = xa + TWO52 - TWO52; */
46567 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46568 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46569 emit_move_insn (res, tmp);
46571 /* generate 1.0 */
46572 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46574 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46575 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46576 emit_insn (gen_rtx_SET (VOIDmode, mask,
46577 gen_rtx_AND (mode, mask, one)));
46578 tmp = expand_simple_binop (mode, MINUS,
46579 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46580 emit_move_insn (res, tmp);
46582 /* res = copysign (res, operand1) */
46583 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46585 emit_label (label);
46586 LABEL_NUSES (label) = 1;
46588 emit_move_insn (operand0, res);
46591 /* Expand SSE sequence for computing round from OPERAND1 storing
46592 into OPERAND0. */
46593 void
46594 ix86_expand_round (rtx operand0, rtx operand1)
46596 /* C code for the stuff we're doing below:
46597 double xa = fabs (x);
46598 if (!isless (xa, TWO52))
46599 return x;
46600 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46601 return copysign (xa, x);
46603 machine_mode mode = GET_MODE (operand0);
46604 rtx res, TWO52, xa, xi, half, mask;
46605 rtx_code_label *label;
46606 const struct real_format *fmt;
46607 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46609 /* Temporary for holding the result, initialized to the input
46610 operand to ease control flow. */
46611 res = gen_reg_rtx (mode);
46612 emit_move_insn (res, operand1);
46614 TWO52 = ix86_gen_TWO52 (mode);
46615 xa = ix86_expand_sse_fabs (res, &mask);
46616 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46618 /* load nextafter (0.5, 0.0) */
46619 fmt = REAL_MODE_FORMAT (mode);
46620 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46621 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46623 /* xa = xa + 0.5 */
46624 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46625 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46627 /* xa = (double)(int64_t)xa */
46628 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46629 expand_fix (xi, xa, 0);
46630 expand_float (xa, xi, 0);
46632 /* res = copysign (xa, operand1) */
46633 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46635 emit_label (label);
46636 LABEL_NUSES (label) = 1;
46638 emit_move_insn (operand0, res);
46641 /* Expand SSE sequence for computing round
46642 from OP1 storing into OP0 using sse4 round insn. */
46643 void
46644 ix86_expand_round_sse4 (rtx op0, rtx op1)
46646 machine_mode mode = GET_MODE (op0);
46647 rtx e1, e2, res, half;
46648 const struct real_format *fmt;
46649 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46650 rtx (*gen_copysign) (rtx, rtx, rtx);
46651 rtx (*gen_round) (rtx, rtx, rtx);
46653 switch (mode)
46655 case SFmode:
46656 gen_copysign = gen_copysignsf3;
46657 gen_round = gen_sse4_1_roundsf2;
46658 break;
46659 case DFmode:
46660 gen_copysign = gen_copysigndf3;
46661 gen_round = gen_sse4_1_rounddf2;
46662 break;
46663 default:
46664 gcc_unreachable ();
46667 /* round (a) = trunc (a + copysign (0.5, a)) */
46669 /* load nextafter (0.5, 0.0) */
46670 fmt = REAL_MODE_FORMAT (mode);
46671 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46672 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46673 half = const_double_from_real_value (pred_half, mode);
46675 /* e1 = copysign (0.5, op1) */
46676 e1 = gen_reg_rtx (mode);
46677 emit_insn (gen_copysign (e1, half, op1));
46679 /* e2 = op1 + e1 */
46680 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46682 /* res = trunc (e2) */
46683 res = gen_reg_rtx (mode);
46684 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46686 emit_move_insn (op0, res);
46690 /* Table of valid machine attributes. */
46691 static const struct attribute_spec ix86_attribute_table[] =
46693 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46694 affects_type_identity } */
46695 /* Stdcall attribute says callee is responsible for popping arguments
46696 if they are not variable. */
46697 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46698 true },
46699 /* Fastcall attribute says callee is responsible for popping arguments
46700 if they are not variable. */
46701 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46702 true },
46703 /* Thiscall attribute says callee is responsible for popping arguments
46704 if they are not variable. */
46705 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46706 true },
46707 /* Cdecl attribute says the callee is a normal C declaration */
46708 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46709 true },
46710 /* Regparm attribute specifies how many integer arguments are to be
46711 passed in registers. */
46712 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46713 true },
46714 /* Sseregparm attribute says we are using x86_64 calling conventions
46715 for FP arguments. */
46716 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46717 true },
46718 /* The transactional memory builtins are implicitly regparm or fastcall
46719 depending on the ABI. Override the generic do-nothing attribute that
46720 these builtins were declared with. */
46721 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46722 true },
46723 /* force_align_arg_pointer says this function realigns the stack at entry. */
46724 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46725 false, true, true, ix86_handle_cconv_attribute, false },
46726 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46727 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46728 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46729 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46730 false },
46731 #endif
46732 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46733 false },
46734 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46735 false },
46736 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46737 SUBTARGET_ATTRIBUTE_TABLE,
46738 #endif
46739 /* ms_abi and sysv_abi calling convention function attributes. */
46740 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46741 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46742 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46743 false },
46744 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46745 ix86_handle_callee_pop_aggregate_return, true },
46746 /* End element. */
46747 { NULL, 0, 0, false, false, false, NULL, false }
46750 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46751 static int
46752 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46753 tree vectype, int)
46755 unsigned elements;
46757 switch (type_of_cost)
46759 case scalar_stmt:
46760 return ix86_cost->scalar_stmt_cost;
46762 case scalar_load:
46763 return ix86_cost->scalar_load_cost;
46765 case scalar_store:
46766 return ix86_cost->scalar_store_cost;
46768 case vector_stmt:
46769 return ix86_cost->vec_stmt_cost;
46771 case vector_load:
46772 return ix86_cost->vec_align_load_cost;
46774 case vector_store:
46775 return ix86_cost->vec_store_cost;
46777 case vec_to_scalar:
46778 return ix86_cost->vec_to_scalar_cost;
46780 case scalar_to_vec:
46781 return ix86_cost->scalar_to_vec_cost;
46783 case unaligned_load:
46784 case unaligned_store:
46785 return ix86_cost->vec_unalign_load_cost;
46787 case cond_branch_taken:
46788 return ix86_cost->cond_taken_branch_cost;
46790 case cond_branch_not_taken:
46791 return ix86_cost->cond_not_taken_branch_cost;
46793 case vec_perm:
46794 case vec_promote_demote:
46795 return ix86_cost->vec_stmt_cost;
46797 case vec_construct:
46798 elements = TYPE_VECTOR_SUBPARTS (vectype);
46799 return elements / 2 + 1;
46801 default:
46802 gcc_unreachable ();
46806 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46807 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46808 insn every time. */
46810 static GTY(()) rtx_insn *vselect_insn;
46812 /* Initialize vselect_insn. */
46814 static void
46815 init_vselect_insn (void)
46817 unsigned i;
46818 rtx x;
46820 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46821 for (i = 0; i < MAX_VECT_LEN; ++i)
46822 XVECEXP (x, 0, i) = const0_rtx;
46823 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46824 const0_rtx), x);
46825 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46826 start_sequence ();
46827 vselect_insn = emit_insn (x);
46828 end_sequence ();
46831 /* Construct (set target (vec_select op0 (parallel perm))) and
46832 return true if that's a valid instruction in the active ISA. */
46834 static bool
46835 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46836 unsigned nelt, bool testing_p)
46838 unsigned int i;
46839 rtx x, save_vconcat;
46840 int icode;
46842 if (vselect_insn == NULL_RTX)
46843 init_vselect_insn ();
46845 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46846 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46847 for (i = 0; i < nelt; ++i)
46848 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46849 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46850 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46851 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46852 SET_DEST (PATTERN (vselect_insn)) = target;
46853 icode = recog_memoized (vselect_insn);
46855 if (icode >= 0 && !testing_p)
46856 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46858 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46859 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46860 INSN_CODE (vselect_insn) = -1;
46862 return icode >= 0;
46865 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46867 static bool
46868 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46869 const unsigned char *perm, unsigned nelt,
46870 bool testing_p)
46872 machine_mode v2mode;
46873 rtx x;
46874 bool ok;
46876 if (vselect_insn == NULL_RTX)
46877 init_vselect_insn ();
46879 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46880 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46881 PUT_MODE (x, v2mode);
46882 XEXP (x, 0) = op0;
46883 XEXP (x, 1) = op1;
46884 ok = expand_vselect (target, x, perm, nelt, testing_p);
46885 XEXP (x, 0) = const0_rtx;
46886 XEXP (x, 1) = const0_rtx;
46887 return ok;
46890 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46891 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46893 static bool
46894 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46896 machine_mode vmode = d->vmode;
46897 unsigned i, mask, nelt = d->nelt;
46898 rtx target, op0, op1, x;
46899 rtx rperm[32], vperm;
46901 if (d->one_operand_p)
46902 return false;
46903 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46904 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46906 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46908 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46910 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46912 else
46913 return false;
46915 /* This is a blend, not a permute. Elements must stay in their
46916 respective lanes. */
46917 for (i = 0; i < nelt; ++i)
46919 unsigned e = d->perm[i];
46920 if (!(e == i || e == i + nelt))
46921 return false;
46924 if (d->testing_p)
46925 return true;
46927 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46928 decision should be extracted elsewhere, so that we only try that
46929 sequence once all budget==3 options have been tried. */
46930 target = d->target;
46931 op0 = d->op0;
46932 op1 = d->op1;
46933 mask = 0;
46935 switch (vmode)
46937 case V8DFmode:
46938 case V16SFmode:
46939 case V4DFmode:
46940 case V8SFmode:
46941 case V2DFmode:
46942 case V4SFmode:
46943 case V8HImode:
46944 case V8SImode:
46945 case V32HImode:
46946 case V64QImode:
46947 case V16SImode:
46948 case V8DImode:
46949 for (i = 0; i < nelt; ++i)
46950 mask |= (d->perm[i] >= nelt) << i;
46951 break;
46953 case V2DImode:
46954 for (i = 0; i < 2; ++i)
46955 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46956 vmode = V8HImode;
46957 goto do_subreg;
46959 case V4SImode:
46960 for (i = 0; i < 4; ++i)
46961 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46962 vmode = V8HImode;
46963 goto do_subreg;
46965 case V16QImode:
46966 /* See if bytes move in pairs so we can use pblendw with
46967 an immediate argument, rather than pblendvb with a vector
46968 argument. */
46969 for (i = 0; i < 16; i += 2)
46970 if (d->perm[i] + 1 != d->perm[i + 1])
46972 use_pblendvb:
46973 for (i = 0; i < nelt; ++i)
46974 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46976 finish_pblendvb:
46977 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46978 vperm = force_reg (vmode, vperm);
46980 if (GET_MODE_SIZE (vmode) == 16)
46981 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46982 else
46983 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46984 if (target != d->target)
46985 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46986 return true;
46989 for (i = 0; i < 8; ++i)
46990 mask |= (d->perm[i * 2] >= 16) << i;
46991 vmode = V8HImode;
46992 /* FALLTHRU */
46994 do_subreg:
46995 target = gen_reg_rtx (vmode);
46996 op0 = gen_lowpart (vmode, op0);
46997 op1 = gen_lowpart (vmode, op1);
46998 break;
47000 case V32QImode:
47001 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47002 for (i = 0; i < 32; i += 2)
47003 if (d->perm[i] + 1 != d->perm[i + 1])
47004 goto use_pblendvb;
47005 /* See if bytes move in quadruplets. If yes, vpblendd
47006 with immediate can be used. */
47007 for (i = 0; i < 32; i += 4)
47008 if (d->perm[i] + 2 != d->perm[i + 2])
47009 break;
47010 if (i < 32)
47012 /* See if bytes move the same in both lanes. If yes,
47013 vpblendw with immediate can be used. */
47014 for (i = 0; i < 16; i += 2)
47015 if (d->perm[i] + 16 != d->perm[i + 16])
47016 goto use_pblendvb;
47018 /* Use vpblendw. */
47019 for (i = 0; i < 16; ++i)
47020 mask |= (d->perm[i * 2] >= 32) << i;
47021 vmode = V16HImode;
47022 goto do_subreg;
47025 /* Use vpblendd. */
47026 for (i = 0; i < 8; ++i)
47027 mask |= (d->perm[i * 4] >= 32) << i;
47028 vmode = V8SImode;
47029 goto do_subreg;
47031 case V16HImode:
47032 /* See if words move in pairs. If yes, vpblendd can be used. */
47033 for (i = 0; i < 16; i += 2)
47034 if (d->perm[i] + 1 != d->perm[i + 1])
47035 break;
47036 if (i < 16)
47038 /* See if words move the same in both lanes. If not,
47039 vpblendvb must be used. */
47040 for (i = 0; i < 8; i++)
47041 if (d->perm[i] + 8 != d->perm[i + 8])
47043 /* Use vpblendvb. */
47044 for (i = 0; i < 32; ++i)
47045 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47047 vmode = V32QImode;
47048 nelt = 32;
47049 target = gen_reg_rtx (vmode);
47050 op0 = gen_lowpart (vmode, op0);
47051 op1 = gen_lowpart (vmode, op1);
47052 goto finish_pblendvb;
47055 /* Use vpblendw. */
47056 for (i = 0; i < 16; ++i)
47057 mask |= (d->perm[i] >= 16) << i;
47058 break;
47061 /* Use vpblendd. */
47062 for (i = 0; i < 8; ++i)
47063 mask |= (d->perm[i * 2] >= 16) << i;
47064 vmode = V8SImode;
47065 goto do_subreg;
47067 case V4DImode:
47068 /* Use vpblendd. */
47069 for (i = 0; i < 4; ++i)
47070 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47071 vmode = V8SImode;
47072 goto do_subreg;
47074 default:
47075 gcc_unreachable ();
47078 /* This matches five different patterns with the different modes. */
47079 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
47080 x = gen_rtx_SET (VOIDmode, target, x);
47081 emit_insn (x);
47082 if (target != d->target)
47083 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47085 return true;
47088 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47089 in terms of the variable form of vpermilps.
47091 Note that we will have already failed the immediate input vpermilps,
47092 which requires that the high and low part shuffle be identical; the
47093 variable form doesn't require that. */
47095 static bool
47096 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47098 rtx rperm[8], vperm;
47099 unsigned i;
47101 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47102 return false;
47104 /* We can only permute within the 128-bit lane. */
47105 for (i = 0; i < 8; ++i)
47107 unsigned e = d->perm[i];
47108 if (i < 4 ? e >= 4 : e < 4)
47109 return false;
47112 if (d->testing_p)
47113 return true;
47115 for (i = 0; i < 8; ++i)
47117 unsigned e = d->perm[i];
47119 /* Within each 128-bit lane, the elements of op0 are numbered
47120 from 0 and the elements of op1 are numbered from 4. */
47121 if (e >= 8 + 4)
47122 e -= 8;
47123 else if (e >= 4)
47124 e -= 4;
47126 rperm[i] = GEN_INT (e);
47129 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47130 vperm = force_reg (V8SImode, vperm);
47131 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47133 return true;
47136 /* Return true if permutation D can be performed as VMODE permutation
47137 instead. */
47139 static bool
47140 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47142 unsigned int i, j, chunk;
47144 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47145 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47146 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47147 return false;
47149 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47150 return true;
47152 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47153 for (i = 0; i < d->nelt; i += chunk)
47154 if (d->perm[i] & (chunk - 1))
47155 return false;
47156 else
47157 for (j = 1; j < chunk; ++j)
47158 if (d->perm[i] + j != d->perm[i + j])
47159 return false;
47161 return true;
47164 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47165 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47167 static bool
47168 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47170 unsigned i, nelt, eltsz, mask;
47171 unsigned char perm[64];
47172 machine_mode vmode = V16QImode;
47173 rtx rperm[64], vperm, target, op0, op1;
47175 nelt = d->nelt;
47177 if (!d->one_operand_p)
47179 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47181 if (TARGET_AVX2
47182 && valid_perm_using_mode_p (V2TImode, d))
47184 if (d->testing_p)
47185 return true;
47187 /* Use vperm2i128 insn. The pattern uses
47188 V4DImode instead of V2TImode. */
47189 target = d->target;
47190 if (d->vmode != V4DImode)
47191 target = gen_reg_rtx (V4DImode);
47192 op0 = gen_lowpart (V4DImode, d->op0);
47193 op1 = gen_lowpart (V4DImode, d->op1);
47194 rperm[0]
47195 = GEN_INT ((d->perm[0] / (nelt / 2))
47196 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47197 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47198 if (target != d->target)
47199 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47200 return true;
47202 return false;
47205 else
47207 if (GET_MODE_SIZE (d->vmode) == 16)
47209 if (!TARGET_SSSE3)
47210 return false;
47212 else if (GET_MODE_SIZE (d->vmode) == 32)
47214 if (!TARGET_AVX2)
47215 return false;
47217 /* V4DImode should be already handled through
47218 expand_vselect by vpermq instruction. */
47219 gcc_assert (d->vmode != V4DImode);
47221 vmode = V32QImode;
47222 if (d->vmode == V8SImode
47223 || d->vmode == V16HImode
47224 || d->vmode == V32QImode)
47226 /* First see if vpermq can be used for
47227 V8SImode/V16HImode/V32QImode. */
47228 if (valid_perm_using_mode_p (V4DImode, d))
47230 for (i = 0; i < 4; i++)
47231 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47232 if (d->testing_p)
47233 return true;
47234 target = gen_reg_rtx (V4DImode);
47235 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47236 perm, 4, false))
47238 emit_move_insn (d->target,
47239 gen_lowpart (d->vmode, target));
47240 return true;
47242 return false;
47245 /* Next see if vpermd can be used. */
47246 if (valid_perm_using_mode_p (V8SImode, d))
47247 vmode = V8SImode;
47249 /* Or if vpermps can be used. */
47250 else if (d->vmode == V8SFmode)
47251 vmode = V8SImode;
47253 if (vmode == V32QImode)
47255 /* vpshufb only works intra lanes, it is not
47256 possible to shuffle bytes in between the lanes. */
47257 for (i = 0; i < nelt; ++i)
47258 if ((d->perm[i] ^ i) & (nelt / 2))
47259 return false;
47262 else if (GET_MODE_SIZE (d->vmode) == 64)
47264 if (!TARGET_AVX512BW)
47265 return false;
47267 /* If vpermq didn't work, vpshufb won't work either. */
47268 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47269 return false;
47271 vmode = V64QImode;
47272 if (d->vmode == V16SImode
47273 || d->vmode == V32HImode
47274 || d->vmode == V64QImode)
47276 /* First see if vpermq can be used for
47277 V16SImode/V32HImode/V64QImode. */
47278 if (valid_perm_using_mode_p (V8DImode, d))
47280 for (i = 0; i < 8; i++)
47281 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47282 if (d->testing_p)
47283 return true;
47284 target = gen_reg_rtx (V8DImode);
47285 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47286 perm, 8, false))
47288 emit_move_insn (d->target,
47289 gen_lowpart (d->vmode, target));
47290 return true;
47292 return false;
47295 /* Next see if vpermd can be used. */
47296 if (valid_perm_using_mode_p (V16SImode, d))
47297 vmode = V16SImode;
47299 /* Or if vpermps can be used. */
47300 else if (d->vmode == V16SFmode)
47301 vmode = V16SImode;
47302 if (vmode == V64QImode)
47304 /* vpshufb only works intra lanes, it is not
47305 possible to shuffle bytes in between the lanes. */
47306 for (i = 0; i < nelt; ++i)
47307 if ((d->perm[i] ^ i) & (nelt / 4))
47308 return false;
47311 else
47312 return false;
47315 if (d->testing_p)
47316 return true;
47318 if (vmode == V8SImode)
47319 for (i = 0; i < 8; ++i)
47320 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47321 else if (vmode == V16SImode)
47322 for (i = 0; i < 16; ++i)
47323 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47324 else
47326 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47327 if (!d->one_operand_p)
47328 mask = 2 * nelt - 1;
47329 else if (vmode == V16QImode)
47330 mask = nelt - 1;
47331 else if (vmode == V64QImode)
47332 mask = nelt / 4 - 1;
47333 else
47334 mask = nelt / 2 - 1;
47336 for (i = 0; i < nelt; ++i)
47338 unsigned j, e = d->perm[i] & mask;
47339 for (j = 0; j < eltsz; ++j)
47340 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47344 vperm = gen_rtx_CONST_VECTOR (vmode,
47345 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47346 vperm = force_reg (vmode, vperm);
47348 target = d->target;
47349 if (d->vmode != vmode)
47350 target = gen_reg_rtx (vmode);
47351 op0 = gen_lowpart (vmode, d->op0);
47352 if (d->one_operand_p)
47354 if (vmode == V16QImode)
47355 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47356 else if (vmode == V32QImode)
47357 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47358 else if (vmode == V64QImode)
47359 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47360 else if (vmode == V8SFmode)
47361 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47362 else if (vmode == V8SImode)
47363 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47364 else if (vmode == V16SFmode)
47365 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47366 else if (vmode == V16SImode)
47367 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47368 else
47369 gcc_unreachable ();
47371 else
47373 op1 = gen_lowpart (vmode, d->op1);
47374 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47376 if (target != d->target)
47377 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47379 return true;
47382 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47383 in a single instruction. */
47385 static bool
47386 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47388 unsigned i, nelt = d->nelt;
47389 unsigned char perm2[MAX_VECT_LEN];
47391 /* Check plain VEC_SELECT first, because AVX has instructions that could
47392 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47393 input where SEL+CONCAT may not. */
47394 if (d->one_operand_p)
47396 int mask = nelt - 1;
47397 bool identity_perm = true;
47398 bool broadcast_perm = true;
47400 for (i = 0; i < nelt; i++)
47402 perm2[i] = d->perm[i] & mask;
47403 if (perm2[i] != i)
47404 identity_perm = false;
47405 if (perm2[i])
47406 broadcast_perm = false;
47409 if (identity_perm)
47411 if (!d->testing_p)
47412 emit_move_insn (d->target, d->op0);
47413 return true;
47415 else if (broadcast_perm && TARGET_AVX2)
47417 /* Use vpbroadcast{b,w,d}. */
47418 rtx (*gen) (rtx, rtx) = NULL;
47419 switch (d->vmode)
47421 case V64QImode:
47422 if (TARGET_AVX512BW)
47423 gen = gen_avx512bw_vec_dupv64qi_1;
47424 break;
47425 case V32QImode:
47426 gen = gen_avx2_pbroadcastv32qi_1;
47427 break;
47428 case V32HImode:
47429 if (TARGET_AVX512BW)
47430 gen = gen_avx512bw_vec_dupv32hi_1;
47431 break;
47432 case V16HImode:
47433 gen = gen_avx2_pbroadcastv16hi_1;
47434 break;
47435 case V16SImode:
47436 if (TARGET_AVX512F)
47437 gen = gen_avx512f_vec_dupv16si_1;
47438 break;
47439 case V8SImode:
47440 gen = gen_avx2_pbroadcastv8si_1;
47441 break;
47442 case V16QImode:
47443 gen = gen_avx2_pbroadcastv16qi;
47444 break;
47445 case V8HImode:
47446 gen = gen_avx2_pbroadcastv8hi;
47447 break;
47448 case V16SFmode:
47449 if (TARGET_AVX512F)
47450 gen = gen_avx512f_vec_dupv16sf_1;
47451 break;
47452 case V8SFmode:
47453 gen = gen_avx2_vec_dupv8sf_1;
47454 break;
47455 case V8DFmode:
47456 if (TARGET_AVX512F)
47457 gen = gen_avx512f_vec_dupv8df_1;
47458 break;
47459 case V8DImode:
47460 if (TARGET_AVX512F)
47461 gen = gen_avx512f_vec_dupv8di_1;
47462 break;
47463 /* For other modes prefer other shuffles this function creates. */
47464 default: break;
47466 if (gen != NULL)
47468 if (!d->testing_p)
47469 emit_insn (gen (d->target, d->op0));
47470 return true;
47474 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47475 return true;
47477 /* There are plenty of patterns in sse.md that are written for
47478 SEL+CONCAT and are not replicated for a single op. Perhaps
47479 that should be changed, to avoid the nastiness here. */
47481 /* Recognize interleave style patterns, which means incrementing
47482 every other permutation operand. */
47483 for (i = 0; i < nelt; i += 2)
47485 perm2[i] = d->perm[i] & mask;
47486 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47488 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47489 d->testing_p))
47490 return true;
47492 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47493 if (nelt >= 4)
47495 for (i = 0; i < nelt; i += 4)
47497 perm2[i + 0] = d->perm[i + 0] & mask;
47498 perm2[i + 1] = d->perm[i + 1] & mask;
47499 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47500 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47503 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47504 d->testing_p))
47505 return true;
47509 /* Finally, try the fully general two operand permute. */
47510 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47511 d->testing_p))
47512 return true;
47514 /* Recognize interleave style patterns with reversed operands. */
47515 if (!d->one_operand_p)
47517 for (i = 0; i < nelt; ++i)
47519 unsigned e = d->perm[i];
47520 if (e >= nelt)
47521 e -= nelt;
47522 else
47523 e += nelt;
47524 perm2[i] = e;
47527 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47528 d->testing_p))
47529 return true;
47532 /* Try the SSE4.1 blend variable merge instructions. */
47533 if (expand_vec_perm_blend (d))
47534 return true;
47536 /* Try one of the AVX vpermil variable permutations. */
47537 if (expand_vec_perm_vpermil (d))
47538 return true;
47540 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47541 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47542 if (expand_vec_perm_pshufb (d))
47543 return true;
47545 /* Try the AVX2 vpalignr instruction. */
47546 if (expand_vec_perm_palignr (d, true))
47547 return true;
47549 /* Try the AVX512F vpermi2 instructions. */
47550 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47551 return true;
47553 return false;
47556 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47557 in terms of a pair of pshuflw + pshufhw instructions. */
47559 static bool
47560 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47562 unsigned char perm2[MAX_VECT_LEN];
47563 unsigned i;
47564 bool ok;
47566 if (d->vmode != V8HImode || !d->one_operand_p)
47567 return false;
47569 /* The two permutations only operate in 64-bit lanes. */
47570 for (i = 0; i < 4; ++i)
47571 if (d->perm[i] >= 4)
47572 return false;
47573 for (i = 4; i < 8; ++i)
47574 if (d->perm[i] < 4)
47575 return false;
47577 if (d->testing_p)
47578 return true;
47580 /* Emit the pshuflw. */
47581 memcpy (perm2, d->perm, 4);
47582 for (i = 4; i < 8; ++i)
47583 perm2[i] = i;
47584 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47585 gcc_assert (ok);
47587 /* Emit the pshufhw. */
47588 memcpy (perm2 + 4, d->perm + 4, 4);
47589 for (i = 0; i < 4; ++i)
47590 perm2[i] = i;
47591 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47592 gcc_assert (ok);
47594 return true;
47597 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47598 the permutation using the SSSE3 palignr instruction. This succeeds
47599 when all of the elements in PERM fit within one vector and we merely
47600 need to shift them down so that a single vector permutation has a
47601 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47602 the vpalignr instruction itself can perform the requested permutation. */
47604 static bool
47605 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47607 unsigned i, nelt = d->nelt;
47608 unsigned min, max, minswap, maxswap;
47609 bool in_order, ok, swap = false;
47610 rtx shift, target;
47611 struct expand_vec_perm_d dcopy;
47613 /* Even with AVX, palignr only operates on 128-bit vectors,
47614 in AVX2 palignr operates on both 128-bit lanes. */
47615 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47616 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47617 return false;
47619 min = 2 * nelt;
47620 max = 0;
47621 minswap = 2 * nelt;
47622 maxswap = 0;
47623 for (i = 0; i < nelt; ++i)
47625 unsigned e = d->perm[i];
47626 unsigned eswap = d->perm[i] ^ nelt;
47627 if (GET_MODE_SIZE (d->vmode) == 32)
47629 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47630 eswap = e ^ (nelt / 2);
47632 if (e < min)
47633 min = e;
47634 if (e > max)
47635 max = e;
47636 if (eswap < minswap)
47637 minswap = eswap;
47638 if (eswap > maxswap)
47639 maxswap = eswap;
47641 if (min == 0
47642 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47644 if (d->one_operand_p
47645 || minswap == 0
47646 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47647 ? nelt / 2 : nelt))
47648 return false;
47649 swap = true;
47650 min = minswap;
47651 max = maxswap;
47654 /* Given that we have SSSE3, we know we'll be able to implement the
47655 single operand permutation after the palignr with pshufb for
47656 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47657 first. */
47658 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47659 return true;
47661 dcopy = *d;
47662 if (swap)
47664 dcopy.op0 = d->op1;
47665 dcopy.op1 = d->op0;
47666 for (i = 0; i < nelt; ++i)
47667 dcopy.perm[i] ^= nelt;
47670 in_order = true;
47671 for (i = 0; i < nelt; ++i)
47673 unsigned e = dcopy.perm[i];
47674 if (GET_MODE_SIZE (d->vmode) == 32
47675 && e >= nelt
47676 && (e & (nelt / 2 - 1)) < min)
47677 e = e - min - (nelt / 2);
47678 else
47679 e = e - min;
47680 if (e != i)
47681 in_order = false;
47682 dcopy.perm[i] = e;
47684 dcopy.one_operand_p = true;
47686 if (single_insn_only_p && !in_order)
47687 return false;
47689 /* For AVX2, test whether we can permute the result in one instruction. */
47690 if (d->testing_p)
47692 if (in_order)
47693 return true;
47694 dcopy.op1 = dcopy.op0;
47695 return expand_vec_perm_1 (&dcopy);
47698 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47699 if (GET_MODE_SIZE (d->vmode) == 16)
47701 target = gen_reg_rtx (TImode);
47702 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47703 gen_lowpart (TImode, dcopy.op0), shift));
47705 else
47707 target = gen_reg_rtx (V2TImode);
47708 emit_insn (gen_avx2_palignrv2ti (target,
47709 gen_lowpart (V2TImode, dcopy.op1),
47710 gen_lowpart (V2TImode, dcopy.op0),
47711 shift));
47714 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47716 /* Test for the degenerate case where the alignment by itself
47717 produces the desired permutation. */
47718 if (in_order)
47720 emit_move_insn (d->target, dcopy.op0);
47721 return true;
47724 ok = expand_vec_perm_1 (&dcopy);
47725 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47727 return ok;
47730 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47731 the permutation using the SSE4_1 pblendv instruction. Potentially
47732 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47734 static bool
47735 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47737 unsigned i, which, nelt = d->nelt;
47738 struct expand_vec_perm_d dcopy, dcopy1;
47739 machine_mode vmode = d->vmode;
47740 bool ok;
47742 /* Use the same checks as in expand_vec_perm_blend. */
47743 if (d->one_operand_p)
47744 return false;
47745 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47747 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47749 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47751 else
47752 return false;
47754 /* Figure out where permutation elements stay not in their
47755 respective lanes. */
47756 for (i = 0, which = 0; i < nelt; ++i)
47758 unsigned e = d->perm[i];
47759 if (e != i)
47760 which |= (e < nelt ? 1 : 2);
47762 /* We can pblend the part where elements stay not in their
47763 respective lanes only when these elements are all in one
47764 half of a permutation.
47765 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47766 lanes, but both 8 and 9 >= 8
47767 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47768 respective lanes and 8 >= 8, but 2 not. */
47769 if (which != 1 && which != 2)
47770 return false;
47771 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47772 return true;
47774 /* First we apply one operand permutation to the part where
47775 elements stay not in their respective lanes. */
47776 dcopy = *d;
47777 if (which == 2)
47778 dcopy.op0 = dcopy.op1 = d->op1;
47779 else
47780 dcopy.op0 = dcopy.op1 = d->op0;
47781 if (!d->testing_p)
47782 dcopy.target = gen_reg_rtx (vmode);
47783 dcopy.one_operand_p = true;
47785 for (i = 0; i < nelt; ++i)
47786 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47788 ok = expand_vec_perm_1 (&dcopy);
47789 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47790 return false;
47791 else
47792 gcc_assert (ok);
47793 if (d->testing_p)
47794 return true;
47796 /* Next we put permuted elements into their positions. */
47797 dcopy1 = *d;
47798 if (which == 2)
47799 dcopy1.op1 = dcopy.target;
47800 else
47801 dcopy1.op0 = dcopy.target;
47803 for (i = 0; i < nelt; ++i)
47804 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47806 ok = expand_vec_perm_blend (&dcopy1);
47807 gcc_assert (ok);
47809 return true;
47812 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47814 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47815 a two vector permutation into a single vector permutation by using
47816 an interleave operation to merge the vectors. */
47818 static bool
47819 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47821 struct expand_vec_perm_d dremap, dfinal;
47822 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47823 unsigned HOST_WIDE_INT contents;
47824 unsigned char remap[2 * MAX_VECT_LEN];
47825 rtx_insn *seq;
47826 bool ok, same_halves = false;
47828 if (GET_MODE_SIZE (d->vmode) == 16)
47830 if (d->one_operand_p)
47831 return false;
47833 else if (GET_MODE_SIZE (d->vmode) == 32)
47835 if (!TARGET_AVX)
47836 return false;
47837 /* For 32-byte modes allow even d->one_operand_p.
47838 The lack of cross-lane shuffling in some instructions
47839 might prevent a single insn shuffle. */
47840 dfinal = *d;
47841 dfinal.testing_p = true;
47842 /* If expand_vec_perm_interleave3 can expand this into
47843 a 3 insn sequence, give up and let it be expanded as
47844 3 insn sequence. While that is one insn longer,
47845 it doesn't need a memory operand and in the common
47846 case that both interleave low and high permutations
47847 with the same operands are adjacent needs 4 insns
47848 for both after CSE. */
47849 if (expand_vec_perm_interleave3 (&dfinal))
47850 return false;
47852 else
47853 return false;
47855 /* Examine from whence the elements come. */
47856 contents = 0;
47857 for (i = 0; i < nelt; ++i)
47858 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47860 memset (remap, 0xff, sizeof (remap));
47861 dremap = *d;
47863 if (GET_MODE_SIZE (d->vmode) == 16)
47865 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47867 /* Split the two input vectors into 4 halves. */
47868 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47869 h2 = h1 << nelt2;
47870 h3 = h2 << nelt2;
47871 h4 = h3 << nelt2;
47873 /* If the elements from the low halves use interleave low, and similarly
47874 for interleave high. If the elements are from mis-matched halves, we
47875 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47876 if ((contents & (h1 | h3)) == contents)
47878 /* punpckl* */
47879 for (i = 0; i < nelt2; ++i)
47881 remap[i] = i * 2;
47882 remap[i + nelt] = i * 2 + 1;
47883 dremap.perm[i * 2] = i;
47884 dremap.perm[i * 2 + 1] = i + nelt;
47886 if (!TARGET_SSE2 && d->vmode == V4SImode)
47887 dremap.vmode = V4SFmode;
47889 else if ((contents & (h2 | h4)) == contents)
47891 /* punpckh* */
47892 for (i = 0; i < nelt2; ++i)
47894 remap[i + nelt2] = i * 2;
47895 remap[i + nelt + nelt2] = i * 2 + 1;
47896 dremap.perm[i * 2] = i + nelt2;
47897 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47899 if (!TARGET_SSE2 && d->vmode == V4SImode)
47900 dremap.vmode = V4SFmode;
47902 else if ((contents & (h1 | h4)) == contents)
47904 /* shufps */
47905 for (i = 0; i < nelt2; ++i)
47907 remap[i] = i;
47908 remap[i + nelt + nelt2] = i + nelt2;
47909 dremap.perm[i] = i;
47910 dremap.perm[i + nelt2] = i + nelt + nelt2;
47912 if (nelt != 4)
47914 /* shufpd */
47915 dremap.vmode = V2DImode;
47916 dremap.nelt = 2;
47917 dremap.perm[0] = 0;
47918 dremap.perm[1] = 3;
47921 else if ((contents & (h2 | h3)) == contents)
47923 /* shufps */
47924 for (i = 0; i < nelt2; ++i)
47926 remap[i + nelt2] = i;
47927 remap[i + nelt] = i + nelt2;
47928 dremap.perm[i] = i + nelt2;
47929 dremap.perm[i + nelt2] = i + nelt;
47931 if (nelt != 4)
47933 /* shufpd */
47934 dremap.vmode = V2DImode;
47935 dremap.nelt = 2;
47936 dremap.perm[0] = 1;
47937 dremap.perm[1] = 2;
47940 else
47941 return false;
47943 else
47945 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47946 unsigned HOST_WIDE_INT q[8];
47947 unsigned int nonzero_halves[4];
47949 /* Split the two input vectors into 8 quarters. */
47950 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47951 for (i = 1; i < 8; ++i)
47952 q[i] = q[0] << (nelt4 * i);
47953 for (i = 0; i < 4; ++i)
47954 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47956 nonzero_halves[nzcnt] = i;
47957 ++nzcnt;
47960 if (nzcnt == 1)
47962 gcc_assert (d->one_operand_p);
47963 nonzero_halves[1] = nonzero_halves[0];
47964 same_halves = true;
47966 else if (d->one_operand_p)
47968 gcc_assert (nonzero_halves[0] == 0);
47969 gcc_assert (nonzero_halves[1] == 1);
47972 if (nzcnt <= 2)
47974 if (d->perm[0] / nelt2 == nonzero_halves[1])
47976 /* Attempt to increase the likelihood that dfinal
47977 shuffle will be intra-lane. */
47978 char tmph = nonzero_halves[0];
47979 nonzero_halves[0] = nonzero_halves[1];
47980 nonzero_halves[1] = tmph;
47983 /* vperm2f128 or vperm2i128. */
47984 for (i = 0; i < nelt2; ++i)
47986 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47987 remap[i + nonzero_halves[0] * nelt2] = i;
47988 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47989 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47992 if (d->vmode != V8SFmode
47993 && d->vmode != V4DFmode
47994 && d->vmode != V8SImode)
47996 dremap.vmode = V8SImode;
47997 dremap.nelt = 8;
47998 for (i = 0; i < 4; ++i)
48000 dremap.perm[i] = i + nonzero_halves[0] * 4;
48001 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48005 else if (d->one_operand_p)
48006 return false;
48007 else if (TARGET_AVX2
48008 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48010 /* vpunpckl* */
48011 for (i = 0; i < nelt4; ++i)
48013 remap[i] = i * 2;
48014 remap[i + nelt] = i * 2 + 1;
48015 remap[i + nelt2] = i * 2 + nelt2;
48016 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48017 dremap.perm[i * 2] = i;
48018 dremap.perm[i * 2 + 1] = i + nelt;
48019 dremap.perm[i * 2 + nelt2] = i + nelt2;
48020 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48023 else if (TARGET_AVX2
48024 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48026 /* vpunpckh* */
48027 for (i = 0; i < nelt4; ++i)
48029 remap[i + nelt4] = i * 2;
48030 remap[i + nelt + nelt4] = i * 2 + 1;
48031 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48032 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48033 dremap.perm[i * 2] = i + nelt4;
48034 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48035 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48036 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48039 else
48040 return false;
48043 /* Use the remapping array set up above to move the elements from their
48044 swizzled locations into their final destinations. */
48045 dfinal = *d;
48046 for (i = 0; i < nelt; ++i)
48048 unsigned e = remap[d->perm[i]];
48049 gcc_assert (e < nelt);
48050 /* If same_halves is true, both halves of the remapped vector are the
48051 same. Avoid cross-lane accesses if possible. */
48052 if (same_halves && i >= nelt2)
48054 gcc_assert (e < nelt2);
48055 dfinal.perm[i] = e + nelt2;
48057 else
48058 dfinal.perm[i] = e;
48060 if (!d->testing_p)
48062 dremap.target = gen_reg_rtx (dremap.vmode);
48063 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48065 dfinal.op1 = dfinal.op0;
48066 dfinal.one_operand_p = true;
48068 /* Test if the final remap can be done with a single insn. For V4SFmode or
48069 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48070 start_sequence ();
48071 ok = expand_vec_perm_1 (&dfinal);
48072 seq = get_insns ();
48073 end_sequence ();
48075 if (!ok)
48076 return false;
48078 if (d->testing_p)
48079 return true;
48081 if (dremap.vmode != dfinal.vmode)
48083 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48084 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48087 ok = expand_vec_perm_1 (&dremap);
48088 gcc_assert (ok);
48090 emit_insn (seq);
48091 return true;
48094 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48095 a single vector cross-lane permutation into vpermq followed
48096 by any of the single insn permutations. */
48098 static bool
48099 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48101 struct expand_vec_perm_d dremap, dfinal;
48102 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48103 unsigned contents[2];
48104 bool ok;
48106 if (!(TARGET_AVX2
48107 && (d->vmode == V32QImode || d->vmode == V16HImode)
48108 && d->one_operand_p))
48109 return false;
48111 contents[0] = 0;
48112 contents[1] = 0;
48113 for (i = 0; i < nelt2; ++i)
48115 contents[0] |= 1u << (d->perm[i] / nelt4);
48116 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48119 for (i = 0; i < 2; ++i)
48121 unsigned int cnt = 0;
48122 for (j = 0; j < 4; ++j)
48123 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48124 return false;
48127 if (d->testing_p)
48128 return true;
48130 dremap = *d;
48131 dremap.vmode = V4DImode;
48132 dremap.nelt = 4;
48133 dremap.target = gen_reg_rtx (V4DImode);
48134 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48135 dremap.op1 = dremap.op0;
48136 dremap.one_operand_p = true;
48137 for (i = 0; i < 2; ++i)
48139 unsigned int cnt = 0;
48140 for (j = 0; j < 4; ++j)
48141 if ((contents[i] & (1u << j)) != 0)
48142 dremap.perm[2 * i + cnt++] = j;
48143 for (; cnt < 2; ++cnt)
48144 dremap.perm[2 * i + cnt] = 0;
48147 dfinal = *d;
48148 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48149 dfinal.op1 = dfinal.op0;
48150 dfinal.one_operand_p = true;
48151 for (i = 0, j = 0; i < nelt; ++i)
48153 if (i == nelt2)
48154 j = 2;
48155 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48156 if ((d->perm[i] / nelt4) == dremap.perm[j])
48158 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48159 dfinal.perm[i] |= nelt4;
48160 else
48161 gcc_unreachable ();
48164 ok = expand_vec_perm_1 (&dremap);
48165 gcc_assert (ok);
48167 ok = expand_vec_perm_1 (&dfinal);
48168 gcc_assert (ok);
48170 return true;
48173 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48174 a vector permutation using two instructions, vperm2f128 resp.
48175 vperm2i128 followed by any single in-lane permutation. */
48177 static bool
48178 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48180 struct expand_vec_perm_d dfirst, dsecond;
48181 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48182 bool ok;
48184 if (!TARGET_AVX
48185 || GET_MODE_SIZE (d->vmode) != 32
48186 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48187 return false;
48189 dsecond = *d;
48190 dsecond.one_operand_p = false;
48191 dsecond.testing_p = true;
48193 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48194 immediate. For perm < 16 the second permutation uses
48195 d->op0 as first operand, for perm >= 16 it uses d->op1
48196 as first operand. The second operand is the result of
48197 vperm2[fi]128. */
48198 for (perm = 0; perm < 32; perm++)
48200 /* Ignore permutations which do not move anything cross-lane. */
48201 if (perm < 16)
48203 /* The second shuffle for e.g. V4DFmode has
48204 0123 and ABCD operands.
48205 Ignore AB23, as 23 is already in the second lane
48206 of the first operand. */
48207 if ((perm & 0xc) == (1 << 2)) continue;
48208 /* And 01CD, as 01 is in the first lane of the first
48209 operand. */
48210 if ((perm & 3) == 0) continue;
48211 /* And 4567, as then the vperm2[fi]128 doesn't change
48212 anything on the original 4567 second operand. */
48213 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48215 else
48217 /* The second shuffle for e.g. V4DFmode has
48218 4567 and ABCD operands.
48219 Ignore AB67, as 67 is already in the second lane
48220 of the first operand. */
48221 if ((perm & 0xc) == (3 << 2)) continue;
48222 /* And 45CD, as 45 is in the first lane of the first
48223 operand. */
48224 if ((perm & 3) == 2) continue;
48225 /* And 0123, as then the vperm2[fi]128 doesn't change
48226 anything on the original 0123 first operand. */
48227 if ((perm & 0xf) == (1 << 2)) continue;
48230 for (i = 0; i < nelt; i++)
48232 j = d->perm[i] / nelt2;
48233 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48234 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48235 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48236 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48237 else
48238 break;
48241 if (i == nelt)
48243 start_sequence ();
48244 ok = expand_vec_perm_1 (&dsecond);
48245 end_sequence ();
48247 else
48248 ok = false;
48250 if (ok)
48252 if (d->testing_p)
48253 return true;
48255 /* Found a usable second shuffle. dfirst will be
48256 vperm2f128 on d->op0 and d->op1. */
48257 dsecond.testing_p = false;
48258 dfirst = *d;
48259 dfirst.target = gen_reg_rtx (d->vmode);
48260 for (i = 0; i < nelt; i++)
48261 dfirst.perm[i] = (i & (nelt2 - 1))
48262 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48264 canonicalize_perm (&dfirst);
48265 ok = expand_vec_perm_1 (&dfirst);
48266 gcc_assert (ok);
48268 /* And dsecond is some single insn shuffle, taking
48269 d->op0 and result of vperm2f128 (if perm < 16) or
48270 d->op1 and result of vperm2f128 (otherwise). */
48271 if (perm >= 16)
48272 dsecond.op0 = dsecond.op1;
48273 dsecond.op1 = dfirst.target;
48275 ok = expand_vec_perm_1 (&dsecond);
48276 gcc_assert (ok);
48278 return true;
48281 /* For one operand, the only useful vperm2f128 permutation is 0x01
48282 aka lanes swap. */
48283 if (d->one_operand_p)
48284 return false;
48287 return false;
48290 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48291 a two vector permutation using 2 intra-lane interleave insns
48292 and cross-lane shuffle for 32-byte vectors. */
48294 static bool
48295 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48297 unsigned i, nelt;
48298 rtx (*gen) (rtx, rtx, rtx);
48300 if (d->one_operand_p)
48301 return false;
48302 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48304 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48306 else
48307 return false;
48309 nelt = d->nelt;
48310 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48311 return false;
48312 for (i = 0; i < nelt; i += 2)
48313 if (d->perm[i] != d->perm[0] + i / 2
48314 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48315 return false;
48317 if (d->testing_p)
48318 return true;
48320 switch (d->vmode)
48322 case V32QImode:
48323 if (d->perm[0])
48324 gen = gen_vec_interleave_highv32qi;
48325 else
48326 gen = gen_vec_interleave_lowv32qi;
48327 break;
48328 case V16HImode:
48329 if (d->perm[0])
48330 gen = gen_vec_interleave_highv16hi;
48331 else
48332 gen = gen_vec_interleave_lowv16hi;
48333 break;
48334 case V8SImode:
48335 if (d->perm[0])
48336 gen = gen_vec_interleave_highv8si;
48337 else
48338 gen = gen_vec_interleave_lowv8si;
48339 break;
48340 case V4DImode:
48341 if (d->perm[0])
48342 gen = gen_vec_interleave_highv4di;
48343 else
48344 gen = gen_vec_interleave_lowv4di;
48345 break;
48346 case V8SFmode:
48347 if (d->perm[0])
48348 gen = gen_vec_interleave_highv8sf;
48349 else
48350 gen = gen_vec_interleave_lowv8sf;
48351 break;
48352 case V4DFmode:
48353 if (d->perm[0])
48354 gen = gen_vec_interleave_highv4df;
48355 else
48356 gen = gen_vec_interleave_lowv4df;
48357 break;
48358 default:
48359 gcc_unreachable ();
48362 emit_insn (gen (d->target, d->op0, d->op1));
48363 return true;
48366 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48367 a single vector permutation using a single intra-lane vector
48368 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48369 the non-swapped and swapped vectors together. */
48371 static bool
48372 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48374 struct expand_vec_perm_d dfirst, dsecond;
48375 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48376 rtx_insn *seq;
48377 bool ok;
48378 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48380 if (!TARGET_AVX
48381 || TARGET_AVX2
48382 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48383 || !d->one_operand_p)
48384 return false;
48386 dfirst = *d;
48387 for (i = 0; i < nelt; i++)
48388 dfirst.perm[i] = 0xff;
48389 for (i = 0, msk = 0; i < nelt; i++)
48391 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48392 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48393 return false;
48394 dfirst.perm[j] = d->perm[i];
48395 if (j != i)
48396 msk |= (1 << i);
48398 for (i = 0; i < nelt; i++)
48399 if (dfirst.perm[i] == 0xff)
48400 dfirst.perm[i] = i;
48402 if (!d->testing_p)
48403 dfirst.target = gen_reg_rtx (dfirst.vmode);
48405 start_sequence ();
48406 ok = expand_vec_perm_1 (&dfirst);
48407 seq = get_insns ();
48408 end_sequence ();
48410 if (!ok)
48411 return false;
48413 if (d->testing_p)
48414 return true;
48416 emit_insn (seq);
48418 dsecond = *d;
48419 dsecond.op0 = dfirst.target;
48420 dsecond.op1 = dfirst.target;
48421 dsecond.one_operand_p = true;
48422 dsecond.target = gen_reg_rtx (dsecond.vmode);
48423 for (i = 0; i < nelt; i++)
48424 dsecond.perm[i] = i ^ nelt2;
48426 ok = expand_vec_perm_1 (&dsecond);
48427 gcc_assert (ok);
48429 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48430 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48431 return true;
48434 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48435 permutation using two vperm2f128, followed by a vshufpd insn blending
48436 the two vectors together. */
48438 static bool
48439 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48441 struct expand_vec_perm_d dfirst, dsecond, dthird;
48442 bool ok;
48444 if (!TARGET_AVX || (d->vmode != V4DFmode))
48445 return false;
48447 if (d->testing_p)
48448 return true;
48450 dfirst = *d;
48451 dsecond = *d;
48452 dthird = *d;
48454 dfirst.perm[0] = (d->perm[0] & ~1);
48455 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48456 dfirst.perm[2] = (d->perm[2] & ~1);
48457 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48458 dsecond.perm[0] = (d->perm[1] & ~1);
48459 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48460 dsecond.perm[2] = (d->perm[3] & ~1);
48461 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48462 dthird.perm[0] = (d->perm[0] % 2);
48463 dthird.perm[1] = (d->perm[1] % 2) + 4;
48464 dthird.perm[2] = (d->perm[2] % 2) + 2;
48465 dthird.perm[3] = (d->perm[3] % 2) + 6;
48467 dfirst.target = gen_reg_rtx (dfirst.vmode);
48468 dsecond.target = gen_reg_rtx (dsecond.vmode);
48469 dthird.op0 = dfirst.target;
48470 dthird.op1 = dsecond.target;
48471 dthird.one_operand_p = false;
48473 canonicalize_perm (&dfirst);
48474 canonicalize_perm (&dsecond);
48476 ok = expand_vec_perm_1 (&dfirst)
48477 && expand_vec_perm_1 (&dsecond)
48478 && expand_vec_perm_1 (&dthird);
48480 gcc_assert (ok);
48482 return true;
48485 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48486 permutation with two pshufb insns and an ior. We should have already
48487 failed all two instruction sequences. */
48489 static bool
48490 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48492 rtx rperm[2][16], vperm, l, h, op, m128;
48493 unsigned int i, nelt, eltsz;
48495 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48496 return false;
48497 gcc_assert (!d->one_operand_p);
48499 if (d->testing_p)
48500 return true;
48502 nelt = d->nelt;
48503 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48505 /* Generate two permutation masks. If the required element is within
48506 the given vector it is shuffled into the proper lane. If the required
48507 element is in the other vector, force a zero into the lane by setting
48508 bit 7 in the permutation mask. */
48509 m128 = GEN_INT (-128);
48510 for (i = 0; i < nelt; ++i)
48512 unsigned j, e = d->perm[i];
48513 unsigned which = (e >= nelt);
48514 if (e >= nelt)
48515 e -= nelt;
48517 for (j = 0; j < eltsz; ++j)
48519 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48520 rperm[1-which][i*eltsz + j] = m128;
48524 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48525 vperm = force_reg (V16QImode, vperm);
48527 l = gen_reg_rtx (V16QImode);
48528 op = gen_lowpart (V16QImode, d->op0);
48529 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48531 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48532 vperm = force_reg (V16QImode, vperm);
48534 h = gen_reg_rtx (V16QImode);
48535 op = gen_lowpart (V16QImode, d->op1);
48536 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48538 op = d->target;
48539 if (d->vmode != V16QImode)
48540 op = gen_reg_rtx (V16QImode);
48541 emit_insn (gen_iorv16qi3 (op, l, h));
48542 if (op != d->target)
48543 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48545 return true;
48548 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48549 with two vpshufb insns, vpermq and vpor. We should have already failed
48550 all two or three instruction sequences. */
48552 static bool
48553 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48555 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48556 unsigned int i, nelt, eltsz;
48558 if (!TARGET_AVX2
48559 || !d->one_operand_p
48560 || (d->vmode != V32QImode && d->vmode != V16HImode))
48561 return false;
48563 if (d->testing_p)
48564 return true;
48566 nelt = d->nelt;
48567 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48569 /* Generate two permutation masks. If the required element is within
48570 the same lane, it is shuffled in. If the required element from the
48571 other lane, force a zero by setting bit 7 in the permutation mask.
48572 In the other mask the mask has non-negative elements if element
48573 is requested from the other lane, but also moved to the other lane,
48574 so that the result of vpshufb can have the two V2TImode halves
48575 swapped. */
48576 m128 = GEN_INT (-128);
48577 for (i = 0; i < nelt; ++i)
48579 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48580 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48582 for (j = 0; j < eltsz; ++j)
48584 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48585 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48589 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48590 vperm = force_reg (V32QImode, vperm);
48592 h = gen_reg_rtx (V32QImode);
48593 op = gen_lowpart (V32QImode, d->op0);
48594 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48596 /* Swap the 128-byte lanes of h into hp. */
48597 hp = gen_reg_rtx (V4DImode);
48598 op = gen_lowpart (V4DImode, h);
48599 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48600 const1_rtx));
48602 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48603 vperm = force_reg (V32QImode, vperm);
48605 l = gen_reg_rtx (V32QImode);
48606 op = gen_lowpart (V32QImode, d->op0);
48607 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48609 op = d->target;
48610 if (d->vmode != V32QImode)
48611 op = gen_reg_rtx (V32QImode);
48612 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48613 if (op != d->target)
48614 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48616 return true;
48619 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48620 and extract-odd permutations of two V32QImode and V16QImode operand
48621 with two vpshufb insns, vpor and vpermq. We should have already
48622 failed all two or three instruction sequences. */
48624 static bool
48625 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48627 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48628 unsigned int i, nelt, eltsz;
48630 if (!TARGET_AVX2
48631 || d->one_operand_p
48632 || (d->vmode != V32QImode && d->vmode != V16HImode))
48633 return false;
48635 for (i = 0; i < d->nelt; ++i)
48636 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48637 return false;
48639 if (d->testing_p)
48640 return true;
48642 nelt = d->nelt;
48643 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48645 /* Generate two permutation masks. In the first permutation mask
48646 the first quarter will contain indexes for the first half
48647 of the op0, the second quarter will contain bit 7 set, third quarter
48648 will contain indexes for the second half of the op0 and the
48649 last quarter bit 7 set. In the second permutation mask
48650 the first quarter will contain bit 7 set, the second quarter
48651 indexes for the first half of the op1, the third quarter bit 7 set
48652 and last quarter indexes for the second half of the op1.
48653 I.e. the first mask e.g. for V32QImode extract even will be:
48654 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48655 (all values masked with 0xf except for -128) and second mask
48656 for extract even will be
48657 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48658 m128 = GEN_INT (-128);
48659 for (i = 0; i < nelt; ++i)
48661 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48662 unsigned which = d->perm[i] >= nelt;
48663 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48665 for (j = 0; j < eltsz; ++j)
48667 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48668 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48672 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48673 vperm = force_reg (V32QImode, vperm);
48675 l = gen_reg_rtx (V32QImode);
48676 op = gen_lowpart (V32QImode, d->op0);
48677 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48679 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48680 vperm = force_reg (V32QImode, vperm);
48682 h = gen_reg_rtx (V32QImode);
48683 op = gen_lowpart (V32QImode, d->op1);
48684 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48686 ior = gen_reg_rtx (V32QImode);
48687 emit_insn (gen_iorv32qi3 (ior, l, h));
48689 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48690 op = gen_reg_rtx (V4DImode);
48691 ior = gen_lowpart (V4DImode, ior);
48692 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48693 const1_rtx, GEN_INT (3)));
48694 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48696 return true;
48699 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48700 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48701 with two "and" and "pack" or two "shift" and "pack" insns. We should
48702 have already failed all two instruction sequences. */
48704 static bool
48705 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48707 rtx op, dop0, dop1, t, rperm[16];
48708 unsigned i, odd, c, s, nelt = d->nelt;
48709 bool end_perm = false;
48710 machine_mode half_mode;
48711 rtx (*gen_and) (rtx, rtx, rtx);
48712 rtx (*gen_pack) (rtx, rtx, rtx);
48713 rtx (*gen_shift) (rtx, rtx, rtx);
48715 if (d->one_operand_p)
48716 return false;
48718 switch (d->vmode)
48720 case V8HImode:
48721 /* Required for "pack". */
48722 if (!TARGET_SSE4_1)
48723 return false;
48724 c = 0xffff;
48725 s = 16;
48726 half_mode = V4SImode;
48727 gen_and = gen_andv4si3;
48728 gen_pack = gen_sse4_1_packusdw;
48729 gen_shift = gen_lshrv4si3;
48730 break;
48731 case V16QImode:
48732 /* No check as all instructions are SSE2. */
48733 c = 0xff;
48734 s = 8;
48735 half_mode = V8HImode;
48736 gen_and = gen_andv8hi3;
48737 gen_pack = gen_sse2_packuswb;
48738 gen_shift = gen_lshrv8hi3;
48739 break;
48740 case V16HImode:
48741 if (!TARGET_AVX2)
48742 return false;
48743 c = 0xffff;
48744 s = 16;
48745 half_mode = V8SImode;
48746 gen_and = gen_andv8si3;
48747 gen_pack = gen_avx2_packusdw;
48748 gen_shift = gen_lshrv8si3;
48749 end_perm = true;
48750 break;
48751 case V32QImode:
48752 if (!TARGET_AVX2)
48753 return false;
48754 c = 0xff;
48755 s = 8;
48756 half_mode = V16HImode;
48757 gen_and = gen_andv16hi3;
48758 gen_pack = gen_avx2_packuswb;
48759 gen_shift = gen_lshrv16hi3;
48760 end_perm = true;
48761 break;
48762 default:
48763 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48764 general shuffles. */
48765 return false;
48768 /* Check that permutation is even or odd. */
48769 odd = d->perm[0];
48770 if (odd > 1)
48771 return false;
48773 for (i = 1; i < nelt; ++i)
48774 if (d->perm[i] != 2 * i + odd)
48775 return false;
48777 if (d->testing_p)
48778 return true;
48780 dop0 = gen_reg_rtx (half_mode);
48781 dop1 = gen_reg_rtx (half_mode);
48782 if (odd == 0)
48784 for (i = 0; i < nelt / 2; i++)
48785 rperm[i] = GEN_INT (c);
48786 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48787 t = force_reg (half_mode, t);
48788 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48789 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48791 else
48793 emit_insn (gen_shift (dop0,
48794 gen_lowpart (half_mode, d->op0),
48795 GEN_INT (s)));
48796 emit_insn (gen_shift (dop1,
48797 gen_lowpart (half_mode, d->op1),
48798 GEN_INT (s)));
48800 /* In AVX2 for 256 bit case we need to permute pack result. */
48801 if (TARGET_AVX2 && end_perm)
48803 op = gen_reg_rtx (d->vmode);
48804 t = gen_reg_rtx (V4DImode);
48805 emit_insn (gen_pack (op, dop0, dop1));
48806 emit_insn (gen_avx2_permv4di_1 (t,
48807 gen_lowpart (V4DImode, op),
48808 const0_rtx,
48809 const2_rtx,
48810 const1_rtx,
48811 GEN_INT (3)));
48812 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48814 else
48815 emit_insn (gen_pack (d->target, dop0, dop1));
48817 return true;
48820 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48821 and extract-odd permutations. */
48823 static bool
48824 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48826 rtx t1, t2, t3, t4, t5;
48828 switch (d->vmode)
48830 case V4DFmode:
48831 if (d->testing_p)
48832 break;
48833 t1 = gen_reg_rtx (V4DFmode);
48834 t2 = gen_reg_rtx (V4DFmode);
48836 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48837 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48838 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48840 /* Now an unpck[lh]pd will produce the result required. */
48841 if (odd)
48842 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48843 else
48844 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48845 emit_insn (t3);
48846 break;
48848 case V8SFmode:
48850 int mask = odd ? 0xdd : 0x88;
48852 if (d->testing_p)
48853 break;
48854 t1 = gen_reg_rtx (V8SFmode);
48855 t2 = gen_reg_rtx (V8SFmode);
48856 t3 = gen_reg_rtx (V8SFmode);
48858 /* Shuffle within the 128-bit lanes to produce:
48859 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48860 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48861 GEN_INT (mask)));
48863 /* Shuffle the lanes around to produce:
48864 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48865 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48866 GEN_INT (0x3)));
48868 /* Shuffle within the 128-bit lanes to produce:
48869 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48870 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48872 /* Shuffle within the 128-bit lanes to produce:
48873 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48874 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48876 /* Shuffle the lanes around to produce:
48877 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48878 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48879 GEN_INT (0x20)));
48881 break;
48883 case V2DFmode:
48884 case V4SFmode:
48885 case V2DImode:
48886 case V4SImode:
48887 /* These are always directly implementable by expand_vec_perm_1. */
48888 gcc_unreachable ();
48890 case V8HImode:
48891 if (TARGET_SSE4_1)
48892 return expand_vec_perm_even_odd_pack (d);
48893 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48894 return expand_vec_perm_pshufb2 (d);
48895 else
48897 if (d->testing_p)
48898 break;
48899 /* We need 2*log2(N)-1 operations to achieve odd/even
48900 with interleave. */
48901 t1 = gen_reg_rtx (V8HImode);
48902 t2 = gen_reg_rtx (V8HImode);
48903 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48904 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48905 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48906 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48907 if (odd)
48908 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48909 else
48910 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48911 emit_insn (t3);
48913 break;
48915 case V16QImode:
48916 return expand_vec_perm_even_odd_pack (d);
48918 case V16HImode:
48919 case V32QImode:
48920 return expand_vec_perm_even_odd_pack (d);
48922 case V4DImode:
48923 if (!TARGET_AVX2)
48925 struct expand_vec_perm_d d_copy = *d;
48926 d_copy.vmode = V4DFmode;
48927 if (d->testing_p)
48928 d_copy.target = gen_lowpart (V4DFmode, d->target);
48929 else
48930 d_copy.target = gen_reg_rtx (V4DFmode);
48931 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48932 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48933 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48935 if (!d->testing_p)
48936 emit_move_insn (d->target,
48937 gen_lowpart (V4DImode, d_copy.target));
48938 return true;
48940 return false;
48943 if (d->testing_p)
48944 break;
48946 t1 = gen_reg_rtx (V4DImode);
48947 t2 = gen_reg_rtx (V4DImode);
48949 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48950 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48951 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48953 /* Now an vpunpck[lh]qdq will produce the result required. */
48954 if (odd)
48955 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48956 else
48957 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48958 emit_insn (t3);
48959 break;
48961 case V8SImode:
48962 if (!TARGET_AVX2)
48964 struct expand_vec_perm_d d_copy = *d;
48965 d_copy.vmode = V8SFmode;
48966 if (d->testing_p)
48967 d_copy.target = gen_lowpart (V8SFmode, d->target);
48968 else
48969 d_copy.target = gen_reg_rtx (V8SFmode);
48970 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48971 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48972 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48974 if (!d->testing_p)
48975 emit_move_insn (d->target,
48976 gen_lowpart (V8SImode, d_copy.target));
48977 return true;
48979 return false;
48982 if (d->testing_p)
48983 break;
48985 t1 = gen_reg_rtx (V8SImode);
48986 t2 = gen_reg_rtx (V8SImode);
48987 t3 = gen_reg_rtx (V4DImode);
48988 t4 = gen_reg_rtx (V4DImode);
48989 t5 = gen_reg_rtx (V4DImode);
48991 /* Shuffle the lanes around into
48992 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48993 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48994 gen_lowpart (V4DImode, d->op1),
48995 GEN_INT (0x20)));
48996 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48997 gen_lowpart (V4DImode, d->op1),
48998 GEN_INT (0x31)));
49000 /* Swap the 2nd and 3rd position in each lane into
49001 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49002 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49003 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49004 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49005 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49007 /* Now an vpunpck[lh]qdq will produce
49008 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49009 if (odd)
49010 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49011 gen_lowpart (V4DImode, t2));
49012 else
49013 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49014 gen_lowpart (V4DImode, t2));
49015 emit_insn (t3);
49016 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49017 break;
49019 default:
49020 gcc_unreachable ();
49023 return true;
49026 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49027 extract-even and extract-odd permutations. */
49029 static bool
49030 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49032 unsigned i, odd, nelt = d->nelt;
49034 odd = d->perm[0];
49035 if (odd != 0 && odd != 1)
49036 return false;
49038 for (i = 1; i < nelt; ++i)
49039 if (d->perm[i] != 2 * i + odd)
49040 return false;
49042 return expand_vec_perm_even_odd_1 (d, odd);
49045 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49046 permutations. We assume that expand_vec_perm_1 has already failed. */
49048 static bool
49049 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49051 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49052 machine_mode vmode = d->vmode;
49053 unsigned char perm2[4];
49054 rtx op0 = d->op0, dest;
49055 bool ok;
49057 switch (vmode)
49059 case V4DFmode:
49060 case V8SFmode:
49061 /* These are special-cased in sse.md so that we can optionally
49062 use the vbroadcast instruction. They expand to two insns
49063 if the input happens to be in a register. */
49064 gcc_unreachable ();
49066 case V2DFmode:
49067 case V2DImode:
49068 case V4SFmode:
49069 case V4SImode:
49070 /* These are always implementable using standard shuffle patterns. */
49071 gcc_unreachable ();
49073 case V8HImode:
49074 case V16QImode:
49075 /* These can be implemented via interleave. We save one insn by
49076 stopping once we have promoted to V4SImode and then use pshufd. */
49077 if (d->testing_p)
49078 return true;
49081 rtx dest;
49082 rtx (*gen) (rtx, rtx, rtx)
49083 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49084 : gen_vec_interleave_lowv8hi;
49086 if (elt >= nelt2)
49088 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49089 : gen_vec_interleave_highv8hi;
49090 elt -= nelt2;
49092 nelt2 /= 2;
49094 dest = gen_reg_rtx (vmode);
49095 emit_insn (gen (dest, op0, op0));
49096 vmode = get_mode_wider_vector (vmode);
49097 op0 = gen_lowpart (vmode, dest);
49099 while (vmode != V4SImode);
49101 memset (perm2, elt, 4);
49102 dest = gen_reg_rtx (V4SImode);
49103 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49104 gcc_assert (ok);
49105 if (!d->testing_p)
49106 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49107 return true;
49109 case V64QImode:
49110 case V32QImode:
49111 case V16HImode:
49112 case V8SImode:
49113 case V4DImode:
49114 /* For AVX2 broadcasts of the first element vpbroadcast* or
49115 vpermq should be used by expand_vec_perm_1. */
49116 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49117 return false;
49119 default:
49120 gcc_unreachable ();
49124 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49125 broadcast permutations. */
49127 static bool
49128 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49130 unsigned i, elt, nelt = d->nelt;
49132 if (!d->one_operand_p)
49133 return false;
49135 elt = d->perm[0];
49136 for (i = 1; i < nelt; ++i)
49137 if (d->perm[i] != elt)
49138 return false;
49140 return expand_vec_perm_broadcast_1 (d);
49143 /* Implement arbitrary permutations of two V64QImode operands
49144 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49145 static bool
49146 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49148 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49149 return false;
49151 if (d->testing_p)
49152 return true;
49154 struct expand_vec_perm_d ds[2];
49155 rtx rperm[128], vperm, target0, target1;
49156 unsigned int i, nelt;
49157 machine_mode vmode;
49159 nelt = d->nelt;
49160 vmode = V64QImode;
49162 for (i = 0; i < 2; i++)
49164 ds[i] = *d;
49165 ds[i].vmode = V32HImode;
49166 ds[i].nelt = 32;
49167 ds[i].target = gen_reg_rtx (V32HImode);
49168 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49169 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49172 /* Prepare permutations such that the first one takes care of
49173 putting the even bytes into the right positions or one higher
49174 positions (ds[0]) and the second one takes care of
49175 putting the odd bytes into the right positions or one below
49176 (ds[1]). */
49178 for (i = 0; i < nelt; i++)
49180 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49181 if (i & 1)
49183 rperm[i] = constm1_rtx;
49184 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49186 else
49188 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49189 rperm[i + 64] = constm1_rtx;
49193 bool ok = expand_vec_perm_1 (&ds[0]);
49194 gcc_assert (ok);
49195 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49197 ok = expand_vec_perm_1 (&ds[1]);
49198 gcc_assert (ok);
49199 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49201 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49202 vperm = force_reg (vmode, vperm);
49203 target0 = gen_reg_rtx (V64QImode);
49204 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49206 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49207 vperm = force_reg (vmode, vperm);
49208 target1 = gen_reg_rtx (V64QImode);
49209 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49211 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49212 return true;
49215 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49216 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49217 all the shorter instruction sequences. */
49219 static bool
49220 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49222 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49223 unsigned int i, nelt, eltsz;
49224 bool used[4];
49226 if (!TARGET_AVX2
49227 || d->one_operand_p
49228 || (d->vmode != V32QImode && d->vmode != V16HImode))
49229 return false;
49231 if (d->testing_p)
49232 return true;
49234 nelt = d->nelt;
49235 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49237 /* Generate 4 permutation masks. If the required element is within
49238 the same lane, it is shuffled in. If the required element from the
49239 other lane, force a zero by setting bit 7 in the permutation mask.
49240 In the other mask the mask has non-negative elements if element
49241 is requested from the other lane, but also moved to the other lane,
49242 so that the result of vpshufb can have the two V2TImode halves
49243 swapped. */
49244 m128 = GEN_INT (-128);
49245 for (i = 0; i < 32; ++i)
49247 rperm[0][i] = m128;
49248 rperm[1][i] = m128;
49249 rperm[2][i] = m128;
49250 rperm[3][i] = m128;
49252 used[0] = false;
49253 used[1] = false;
49254 used[2] = false;
49255 used[3] = false;
49256 for (i = 0; i < nelt; ++i)
49258 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49259 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49260 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49262 for (j = 0; j < eltsz; ++j)
49263 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49264 used[which] = true;
49267 for (i = 0; i < 2; ++i)
49269 if (!used[2 * i + 1])
49271 h[i] = NULL_RTX;
49272 continue;
49274 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49275 gen_rtvec_v (32, rperm[2 * i + 1]));
49276 vperm = force_reg (V32QImode, vperm);
49277 h[i] = gen_reg_rtx (V32QImode);
49278 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49279 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49282 /* Swap the 128-byte lanes of h[X]. */
49283 for (i = 0; i < 2; ++i)
49285 if (h[i] == NULL_RTX)
49286 continue;
49287 op = gen_reg_rtx (V4DImode);
49288 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49289 const2_rtx, GEN_INT (3), const0_rtx,
49290 const1_rtx));
49291 h[i] = gen_lowpart (V32QImode, op);
49294 for (i = 0; i < 2; ++i)
49296 if (!used[2 * i])
49298 l[i] = NULL_RTX;
49299 continue;
49301 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49302 vperm = force_reg (V32QImode, vperm);
49303 l[i] = gen_reg_rtx (V32QImode);
49304 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49305 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49308 for (i = 0; i < 2; ++i)
49310 if (h[i] && l[i])
49312 op = gen_reg_rtx (V32QImode);
49313 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49314 l[i] = op;
49316 else if (h[i])
49317 l[i] = h[i];
49320 gcc_assert (l[0] && l[1]);
49321 op = d->target;
49322 if (d->vmode != V32QImode)
49323 op = gen_reg_rtx (V32QImode);
49324 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49325 if (op != d->target)
49326 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49327 return true;
49330 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49331 With all of the interface bits taken care of, perform the expansion
49332 in D and return true on success. */
49334 static bool
49335 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49337 /* Try a single instruction expansion. */
49338 if (expand_vec_perm_1 (d))
49339 return true;
49341 /* Try sequences of two instructions. */
49343 if (expand_vec_perm_pshuflw_pshufhw (d))
49344 return true;
49346 if (expand_vec_perm_palignr (d, false))
49347 return true;
49349 if (expand_vec_perm_interleave2 (d))
49350 return true;
49352 if (expand_vec_perm_broadcast (d))
49353 return true;
49355 if (expand_vec_perm_vpermq_perm_1 (d))
49356 return true;
49358 if (expand_vec_perm_vperm2f128 (d))
49359 return true;
49361 if (expand_vec_perm_pblendv (d))
49362 return true;
49364 /* Try sequences of three instructions. */
49366 if (expand_vec_perm_even_odd_pack (d))
49367 return true;
49369 if (expand_vec_perm_2vperm2f128_vshuf (d))
49370 return true;
49372 if (expand_vec_perm_pshufb2 (d))
49373 return true;
49375 if (expand_vec_perm_interleave3 (d))
49376 return true;
49378 if (expand_vec_perm_vperm2f128_vblend (d))
49379 return true;
49381 /* Try sequences of four instructions. */
49383 if (expand_vec_perm_vpshufb2_vpermq (d))
49384 return true;
49386 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49387 return true;
49389 if (expand_vec_perm_vpermi2_vpshub2 (d))
49390 return true;
49392 /* ??? Look for narrow permutations whose element orderings would
49393 allow the promotion to a wider mode. */
49395 /* ??? Look for sequences of interleave or a wider permute that place
49396 the data into the correct lanes for a half-vector shuffle like
49397 pshuf[lh]w or vpermilps. */
49399 /* ??? Look for sequences of interleave that produce the desired results.
49400 The combinatorics of punpck[lh] get pretty ugly... */
49402 if (expand_vec_perm_even_odd (d))
49403 return true;
49405 /* Even longer sequences. */
49406 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49407 return true;
49409 return false;
49412 /* If a permutation only uses one operand, make it clear. Returns true
49413 if the permutation references both operands. */
49415 static bool
49416 canonicalize_perm (struct expand_vec_perm_d *d)
49418 int i, which, nelt = d->nelt;
49420 for (i = which = 0; i < nelt; ++i)
49421 which |= (d->perm[i] < nelt ? 1 : 2);
49423 d->one_operand_p = true;
49424 switch (which)
49426 default:
49427 gcc_unreachable();
49429 case 3:
49430 if (!rtx_equal_p (d->op0, d->op1))
49432 d->one_operand_p = false;
49433 break;
49435 /* The elements of PERM do not suggest that only the first operand
49436 is used, but both operands are identical. Allow easier matching
49437 of the permutation by folding the permutation into the single
49438 input vector. */
49439 /* FALLTHRU */
49441 case 2:
49442 for (i = 0; i < nelt; ++i)
49443 d->perm[i] &= nelt - 1;
49444 d->op0 = d->op1;
49445 break;
49447 case 1:
49448 d->op1 = d->op0;
49449 break;
49452 return (which == 3);
49455 bool
49456 ix86_expand_vec_perm_const (rtx operands[4])
49458 struct expand_vec_perm_d d;
49459 unsigned char perm[MAX_VECT_LEN];
49460 int i, nelt;
49461 bool two_args;
49462 rtx sel;
49464 d.target = operands[0];
49465 d.op0 = operands[1];
49466 d.op1 = operands[2];
49467 sel = operands[3];
49469 d.vmode = GET_MODE (d.target);
49470 gcc_assert (VECTOR_MODE_P (d.vmode));
49471 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49472 d.testing_p = false;
49474 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49475 gcc_assert (XVECLEN (sel, 0) == nelt);
49476 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49478 for (i = 0; i < nelt; ++i)
49480 rtx e = XVECEXP (sel, 0, i);
49481 int ei = INTVAL (e) & (2 * nelt - 1);
49482 d.perm[i] = ei;
49483 perm[i] = ei;
49486 two_args = canonicalize_perm (&d);
49488 if (ix86_expand_vec_perm_const_1 (&d))
49489 return true;
49491 /* If the selector says both arguments are needed, but the operands are the
49492 same, the above tried to expand with one_operand_p and flattened selector.
49493 If that didn't work, retry without one_operand_p; we succeeded with that
49494 during testing. */
49495 if (two_args && d.one_operand_p)
49497 d.one_operand_p = false;
49498 memcpy (d.perm, perm, sizeof (perm));
49499 return ix86_expand_vec_perm_const_1 (&d);
49502 return false;
49505 /* Implement targetm.vectorize.vec_perm_const_ok. */
49507 static bool
49508 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49509 const unsigned char *sel)
49511 struct expand_vec_perm_d d;
49512 unsigned int i, nelt, which;
49513 bool ret;
49515 d.vmode = vmode;
49516 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49517 d.testing_p = true;
49519 /* Given sufficient ISA support we can just return true here
49520 for selected vector modes. */
49521 switch (d.vmode)
49523 case V16SFmode:
49524 case V16SImode:
49525 case V8DImode:
49526 case V8DFmode:
49527 if (TARGET_AVX512F)
49528 /* All implementable with a single vpermi2 insn. */
49529 return true;
49530 break;
49531 case V32HImode:
49532 if (TARGET_AVX512BW)
49533 /* All implementable with a single vpermi2 insn. */
49534 return true;
49535 break;
49536 case V64QImode:
49537 if (TARGET_AVX512BW)
49538 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49539 return true;
49540 break;
49541 case V8SImode:
49542 case V8SFmode:
49543 case V4DFmode:
49544 case V4DImode:
49545 if (TARGET_AVX512VL)
49546 /* All implementable with a single vpermi2 insn. */
49547 return true;
49548 break;
49549 case V16HImode:
49550 if (TARGET_AVX2)
49551 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49552 return true;
49553 break;
49554 case V32QImode:
49555 if (TARGET_AVX2)
49556 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49557 return true;
49558 break;
49559 case V4SImode:
49560 case V4SFmode:
49561 case V8HImode:
49562 case V16QImode:
49563 /* All implementable with a single vpperm insn. */
49564 if (TARGET_XOP)
49565 return true;
49566 /* All implementable with 2 pshufb + 1 ior. */
49567 if (TARGET_SSSE3)
49568 return true;
49569 break;
49570 case V2DImode:
49571 case V2DFmode:
49572 /* All implementable with shufpd or unpck[lh]pd. */
49573 return true;
49574 default:
49575 return false;
49578 /* Extract the values from the vector CST into the permutation
49579 array in D. */
49580 memcpy (d.perm, sel, nelt);
49581 for (i = which = 0; i < nelt; ++i)
49583 unsigned char e = d.perm[i];
49584 gcc_assert (e < 2 * nelt);
49585 which |= (e < nelt ? 1 : 2);
49588 /* For all elements from second vector, fold the elements to first. */
49589 if (which == 2)
49590 for (i = 0; i < nelt; ++i)
49591 d.perm[i] -= nelt;
49593 /* Check whether the mask can be applied to the vector type. */
49594 d.one_operand_p = (which != 3);
49596 /* Implementable with shufps or pshufd. */
49597 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49598 return true;
49600 /* Otherwise we have to go through the motions and see if we can
49601 figure out how to generate the requested permutation. */
49602 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49603 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49604 if (!d.one_operand_p)
49605 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49607 start_sequence ();
49608 ret = ix86_expand_vec_perm_const_1 (&d);
49609 end_sequence ();
49611 return ret;
49614 void
49615 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49617 struct expand_vec_perm_d d;
49618 unsigned i, nelt;
49620 d.target = targ;
49621 d.op0 = op0;
49622 d.op1 = op1;
49623 d.vmode = GET_MODE (targ);
49624 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49625 d.one_operand_p = false;
49626 d.testing_p = false;
49628 for (i = 0; i < nelt; ++i)
49629 d.perm[i] = i * 2 + odd;
49631 /* We'll either be able to implement the permutation directly... */
49632 if (expand_vec_perm_1 (&d))
49633 return;
49635 /* ... or we use the special-case patterns. */
49636 expand_vec_perm_even_odd_1 (&d, odd);
49639 static void
49640 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49642 struct expand_vec_perm_d d;
49643 unsigned i, nelt, base;
49644 bool ok;
49646 d.target = targ;
49647 d.op0 = op0;
49648 d.op1 = op1;
49649 d.vmode = GET_MODE (targ);
49650 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49651 d.one_operand_p = false;
49652 d.testing_p = false;
49654 base = high_p ? nelt / 2 : 0;
49655 for (i = 0; i < nelt / 2; ++i)
49657 d.perm[i * 2] = i + base;
49658 d.perm[i * 2 + 1] = i + base + nelt;
49661 /* Note that for AVX this isn't one instruction. */
49662 ok = ix86_expand_vec_perm_const_1 (&d);
49663 gcc_assert (ok);
49667 /* Expand a vector operation CODE for a V*QImode in terms of the
49668 same operation on V*HImode. */
49670 void
49671 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49673 machine_mode qimode = GET_MODE (dest);
49674 machine_mode himode;
49675 rtx (*gen_il) (rtx, rtx, rtx);
49676 rtx (*gen_ih) (rtx, rtx, rtx);
49677 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49678 struct expand_vec_perm_d d;
49679 bool ok, full_interleave;
49680 bool uns_p = false;
49681 int i;
49683 switch (qimode)
49685 case V16QImode:
49686 himode = V8HImode;
49687 gen_il = gen_vec_interleave_lowv16qi;
49688 gen_ih = gen_vec_interleave_highv16qi;
49689 break;
49690 case V32QImode:
49691 himode = V16HImode;
49692 gen_il = gen_avx2_interleave_lowv32qi;
49693 gen_ih = gen_avx2_interleave_highv32qi;
49694 break;
49695 case V64QImode:
49696 himode = V32HImode;
49697 gen_il = gen_avx512bw_interleave_lowv64qi;
49698 gen_ih = gen_avx512bw_interleave_highv64qi;
49699 break;
49700 default:
49701 gcc_unreachable ();
49704 op2_l = op2_h = op2;
49705 switch (code)
49707 case MULT:
49708 /* Unpack data such that we've got a source byte in each low byte of
49709 each word. We don't care what goes into the high byte of each word.
49710 Rather than trying to get zero in there, most convenient is to let
49711 it be a copy of the low byte. */
49712 op2_l = gen_reg_rtx (qimode);
49713 op2_h = gen_reg_rtx (qimode);
49714 emit_insn (gen_il (op2_l, op2, op2));
49715 emit_insn (gen_ih (op2_h, op2, op2));
49716 /* FALLTHRU */
49718 op1_l = gen_reg_rtx (qimode);
49719 op1_h = gen_reg_rtx (qimode);
49720 emit_insn (gen_il (op1_l, op1, op1));
49721 emit_insn (gen_ih (op1_h, op1, op1));
49722 full_interleave = qimode == V16QImode;
49723 break;
49725 case ASHIFT:
49726 case LSHIFTRT:
49727 uns_p = true;
49728 /* FALLTHRU */
49729 case ASHIFTRT:
49730 op1_l = gen_reg_rtx (himode);
49731 op1_h = gen_reg_rtx (himode);
49732 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49733 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49734 full_interleave = true;
49735 break;
49736 default:
49737 gcc_unreachable ();
49740 /* Perform the operation. */
49741 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49742 1, OPTAB_DIRECT);
49743 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49744 1, OPTAB_DIRECT);
49745 gcc_assert (res_l && res_h);
49747 /* Merge the data back into the right place. */
49748 d.target = dest;
49749 d.op0 = gen_lowpart (qimode, res_l);
49750 d.op1 = gen_lowpart (qimode, res_h);
49751 d.vmode = qimode;
49752 d.nelt = GET_MODE_NUNITS (qimode);
49753 d.one_operand_p = false;
49754 d.testing_p = false;
49756 if (full_interleave)
49758 /* For SSE2, we used an full interleave, so the desired
49759 results are in the even elements. */
49760 for (i = 0; i < 64; ++i)
49761 d.perm[i] = i * 2;
49763 else
49765 /* For AVX, the interleave used above was not cross-lane. So the
49766 extraction is evens but with the second and third quarter swapped.
49767 Happily, that is even one insn shorter than even extraction. */
49768 for (i = 0; i < 64; ++i)
49769 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49772 ok = ix86_expand_vec_perm_const_1 (&d);
49773 gcc_assert (ok);
49775 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49776 gen_rtx_fmt_ee (code, qimode, op1, op2));
49779 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49780 if op is CONST_VECTOR with all odd elements equal to their
49781 preceding element. */
49783 static bool
49784 const_vector_equal_evenodd_p (rtx op)
49786 machine_mode mode = GET_MODE (op);
49787 int i, nunits = GET_MODE_NUNITS (mode);
49788 if (GET_CODE (op) != CONST_VECTOR
49789 || nunits != CONST_VECTOR_NUNITS (op))
49790 return false;
49791 for (i = 0; i < nunits; i += 2)
49792 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49793 return false;
49794 return true;
49797 void
49798 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49799 bool uns_p, bool odd_p)
49801 machine_mode mode = GET_MODE (op1);
49802 machine_mode wmode = GET_MODE (dest);
49803 rtx x;
49804 rtx orig_op1 = op1, orig_op2 = op2;
49806 if (!nonimmediate_operand (op1, mode))
49807 op1 = force_reg (mode, op1);
49808 if (!nonimmediate_operand (op2, mode))
49809 op2 = force_reg (mode, op2);
49811 /* We only play even/odd games with vectors of SImode. */
49812 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49814 /* If we're looking for the odd results, shift those members down to
49815 the even slots. For some cpus this is faster than a PSHUFD. */
49816 if (odd_p)
49818 /* For XOP use vpmacsdqh, but only for smult, as it is only
49819 signed. */
49820 if (TARGET_XOP && mode == V4SImode && !uns_p)
49822 x = force_reg (wmode, CONST0_RTX (wmode));
49823 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49824 return;
49827 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49828 if (!const_vector_equal_evenodd_p (orig_op1))
49829 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49830 x, NULL, 1, OPTAB_DIRECT);
49831 if (!const_vector_equal_evenodd_p (orig_op2))
49832 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49833 x, NULL, 1, OPTAB_DIRECT);
49834 op1 = gen_lowpart (mode, op1);
49835 op2 = gen_lowpart (mode, op2);
49838 if (mode == V16SImode)
49840 if (uns_p)
49841 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49842 else
49843 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49845 else if (mode == V8SImode)
49847 if (uns_p)
49848 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49849 else
49850 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49852 else if (uns_p)
49853 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49854 else if (TARGET_SSE4_1)
49855 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49856 else
49858 rtx s1, s2, t0, t1, t2;
49860 /* The easiest way to implement this without PMULDQ is to go through
49861 the motions as if we are performing a full 64-bit multiply. With
49862 the exception that we need to do less shuffling of the elements. */
49864 /* Compute the sign-extension, aka highparts, of the two operands. */
49865 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49866 op1, pc_rtx, pc_rtx);
49867 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49868 op2, pc_rtx, pc_rtx);
49870 /* Multiply LO(A) * HI(B), and vice-versa. */
49871 t1 = gen_reg_rtx (wmode);
49872 t2 = gen_reg_rtx (wmode);
49873 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49874 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49876 /* Multiply LO(A) * LO(B). */
49877 t0 = gen_reg_rtx (wmode);
49878 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49880 /* Combine and shift the highparts into place. */
49881 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49882 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49883 1, OPTAB_DIRECT);
49885 /* Combine high and low parts. */
49886 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49887 return;
49889 emit_insn (x);
49892 void
49893 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49894 bool uns_p, bool high_p)
49896 machine_mode wmode = GET_MODE (dest);
49897 machine_mode mode = GET_MODE (op1);
49898 rtx t1, t2, t3, t4, mask;
49900 switch (mode)
49902 case V4SImode:
49903 t1 = gen_reg_rtx (mode);
49904 t2 = gen_reg_rtx (mode);
49905 if (TARGET_XOP && !uns_p)
49907 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49908 shuffle the elements once so that all elements are in the right
49909 place for immediate use: { A C B D }. */
49910 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49911 const1_rtx, GEN_INT (3)));
49912 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49913 const1_rtx, GEN_INT (3)));
49915 else
49917 /* Put the elements into place for the multiply. */
49918 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49919 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49920 high_p = false;
49922 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49923 break;
49925 case V8SImode:
49926 /* Shuffle the elements between the lanes. After this we
49927 have { A B E F | C D G H } for each operand. */
49928 t1 = gen_reg_rtx (V4DImode);
49929 t2 = gen_reg_rtx (V4DImode);
49930 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49931 const0_rtx, const2_rtx,
49932 const1_rtx, GEN_INT (3)));
49933 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49934 const0_rtx, const2_rtx,
49935 const1_rtx, GEN_INT (3)));
49937 /* Shuffle the elements within the lanes. After this we
49938 have { A A B B | C C D D } or { E E F F | G G H H }. */
49939 t3 = gen_reg_rtx (V8SImode);
49940 t4 = gen_reg_rtx (V8SImode);
49941 mask = GEN_INT (high_p
49942 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49943 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49944 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49945 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49947 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49948 break;
49950 case V8HImode:
49951 case V16HImode:
49952 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49953 uns_p, OPTAB_DIRECT);
49954 t2 = expand_binop (mode,
49955 uns_p ? umul_highpart_optab : smul_highpart_optab,
49956 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49957 gcc_assert (t1 && t2);
49959 t3 = gen_reg_rtx (mode);
49960 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49961 emit_move_insn (dest, gen_lowpart (wmode, t3));
49962 break;
49964 case V16QImode:
49965 case V32QImode:
49966 case V32HImode:
49967 case V16SImode:
49968 case V64QImode:
49969 t1 = gen_reg_rtx (wmode);
49970 t2 = gen_reg_rtx (wmode);
49971 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49972 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49974 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49975 break;
49977 default:
49978 gcc_unreachable ();
49982 void
49983 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49985 rtx res_1, res_2, res_3, res_4;
49987 res_1 = gen_reg_rtx (V4SImode);
49988 res_2 = gen_reg_rtx (V4SImode);
49989 res_3 = gen_reg_rtx (V2DImode);
49990 res_4 = gen_reg_rtx (V2DImode);
49991 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49992 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49994 /* Move the results in element 2 down to element 1; we don't care
49995 what goes in elements 2 and 3. Then we can merge the parts
49996 back together with an interleave.
49998 Note that two other sequences were tried:
49999 (1) Use interleaves at the start instead of psrldq, which allows
50000 us to use a single shufps to merge things back at the end.
50001 (2) Use shufps here to combine the two vectors, then pshufd to
50002 put the elements in the correct order.
50003 In both cases the cost of the reformatting stall was too high
50004 and the overall sequence slower. */
50006 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50007 const0_rtx, const2_rtx,
50008 const0_rtx, const0_rtx));
50009 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50010 const0_rtx, const2_rtx,
50011 const0_rtx, const0_rtx));
50012 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50014 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50017 void
50018 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50020 machine_mode mode = GET_MODE (op0);
50021 rtx t1, t2, t3, t4, t5, t6;
50023 if (TARGET_AVX512DQ && mode == V8DImode)
50024 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50025 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50026 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50027 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50028 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50029 else if (TARGET_XOP && mode == V2DImode)
50031 /* op1: A,B,C,D, op2: E,F,G,H */
50032 op1 = gen_lowpart (V4SImode, op1);
50033 op2 = gen_lowpart (V4SImode, op2);
50035 t1 = gen_reg_rtx (V4SImode);
50036 t2 = gen_reg_rtx (V4SImode);
50037 t3 = gen_reg_rtx (V2DImode);
50038 t4 = gen_reg_rtx (V2DImode);
50040 /* t1: B,A,D,C */
50041 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50042 GEN_INT (1),
50043 GEN_INT (0),
50044 GEN_INT (3),
50045 GEN_INT (2)));
50047 /* t2: (B*E),(A*F),(D*G),(C*H) */
50048 emit_insn (gen_mulv4si3 (t2, t1, op2));
50050 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50051 emit_insn (gen_xop_phadddq (t3, t2));
50053 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50054 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50056 /* Multiply lower parts and add all */
50057 t5 = gen_reg_rtx (V2DImode);
50058 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50059 gen_lowpart (V4SImode, op1),
50060 gen_lowpart (V4SImode, op2)));
50061 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50064 else
50066 machine_mode nmode;
50067 rtx (*umul) (rtx, rtx, rtx);
50069 if (mode == V2DImode)
50071 umul = gen_vec_widen_umult_even_v4si;
50072 nmode = V4SImode;
50074 else if (mode == V4DImode)
50076 umul = gen_vec_widen_umult_even_v8si;
50077 nmode = V8SImode;
50079 else if (mode == V8DImode)
50081 umul = gen_vec_widen_umult_even_v16si;
50082 nmode = V16SImode;
50084 else
50085 gcc_unreachable ();
50088 /* Multiply low parts. */
50089 t1 = gen_reg_rtx (mode);
50090 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50092 /* Shift input vectors right 32 bits so we can multiply high parts. */
50093 t6 = GEN_INT (32);
50094 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50095 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50097 /* Multiply high parts by low parts. */
50098 t4 = gen_reg_rtx (mode);
50099 t5 = gen_reg_rtx (mode);
50100 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50101 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50103 /* Combine and shift the highparts back. */
50104 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50105 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50107 /* Combine high and low parts. */
50108 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50111 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50112 gen_rtx_MULT (mode, op1, op2));
50115 /* Return 1 if control tansfer instruction INSN
50116 should be encoded with bnd prefix.
50117 If insn is NULL then return 1 when control
50118 transfer instructions should be prefixed with
50119 bnd by default for current function. */
50121 bool
50122 ix86_bnd_prefixed_insn_p (rtx insn)
50124 /* For call insns check special flag. */
50125 if (insn && CALL_P (insn))
50127 rtx call = get_call_rtx_from (insn);
50128 if (call)
50129 return CALL_EXPR_WITH_BOUNDS_P (call);
50132 /* All other insns are prefixed only if function is instrumented. */
50133 return chkp_function_instrumented_p (current_function_decl);
50136 /* Calculate integer abs() using only SSE2 instructions. */
50138 void
50139 ix86_expand_sse2_abs (rtx target, rtx input)
50141 machine_mode mode = GET_MODE (target);
50142 rtx tmp0, tmp1, x;
50144 switch (mode)
50146 /* For 32-bit signed integer X, the best way to calculate the absolute
50147 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50148 case V4SImode:
50149 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50150 GEN_INT (GET_MODE_BITSIZE
50151 (GET_MODE_INNER (mode)) - 1),
50152 NULL, 0, OPTAB_DIRECT);
50153 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50154 NULL, 0, OPTAB_DIRECT);
50155 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50156 target, 0, OPTAB_DIRECT);
50157 break;
50159 /* For 16-bit signed integer X, the best way to calculate the absolute
50160 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50161 case V8HImode:
50162 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50164 x = expand_simple_binop (mode, SMAX, tmp0, input,
50165 target, 0, OPTAB_DIRECT);
50166 break;
50168 /* For 8-bit signed integer X, the best way to calculate the absolute
50169 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50170 as SSE2 provides the PMINUB insn. */
50171 case V16QImode:
50172 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50174 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50175 target, 0, OPTAB_DIRECT);
50176 break;
50178 default:
50179 gcc_unreachable ();
50182 if (x != target)
50183 emit_move_insn (target, x);
50186 /* Expand an insert into a vector register through pinsr insn.
50187 Return true if successful. */
50189 bool
50190 ix86_expand_pinsr (rtx *operands)
50192 rtx dst = operands[0];
50193 rtx src = operands[3];
50195 unsigned int size = INTVAL (operands[1]);
50196 unsigned int pos = INTVAL (operands[2]);
50198 if (GET_CODE (dst) == SUBREG)
50200 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50201 dst = SUBREG_REG (dst);
50204 if (GET_CODE (src) == SUBREG)
50205 src = SUBREG_REG (src);
50207 switch (GET_MODE (dst))
50209 case V16QImode:
50210 case V8HImode:
50211 case V4SImode:
50212 case V2DImode:
50214 machine_mode srcmode, dstmode;
50215 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50217 srcmode = mode_for_size (size, MODE_INT, 0);
50219 switch (srcmode)
50221 case QImode:
50222 if (!TARGET_SSE4_1)
50223 return false;
50224 dstmode = V16QImode;
50225 pinsr = gen_sse4_1_pinsrb;
50226 break;
50228 case HImode:
50229 if (!TARGET_SSE2)
50230 return false;
50231 dstmode = V8HImode;
50232 pinsr = gen_sse2_pinsrw;
50233 break;
50235 case SImode:
50236 if (!TARGET_SSE4_1)
50237 return false;
50238 dstmode = V4SImode;
50239 pinsr = gen_sse4_1_pinsrd;
50240 break;
50242 case DImode:
50243 gcc_assert (TARGET_64BIT);
50244 if (!TARGET_SSE4_1)
50245 return false;
50246 dstmode = V2DImode;
50247 pinsr = gen_sse4_1_pinsrq;
50248 break;
50250 default:
50251 return false;
50254 rtx d = dst;
50255 if (GET_MODE (dst) != dstmode)
50256 d = gen_reg_rtx (dstmode);
50257 src = gen_lowpart (srcmode, src);
50259 pos /= size;
50261 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50262 GEN_INT (1 << pos)));
50263 if (d != dst)
50264 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50265 return true;
50268 default:
50269 return false;
50273 /* This function returns the calling abi specific va_list type node.
50274 It returns the FNDECL specific va_list type. */
50276 static tree
50277 ix86_fn_abi_va_list (tree fndecl)
50279 if (!TARGET_64BIT)
50280 return va_list_type_node;
50281 gcc_assert (fndecl != NULL_TREE);
50283 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50284 return ms_va_list_type_node;
50285 else
50286 return sysv_va_list_type_node;
50289 /* Returns the canonical va_list type specified by TYPE. If there
50290 is no valid TYPE provided, it return NULL_TREE. */
50292 static tree
50293 ix86_canonical_va_list_type (tree type)
50295 tree wtype, htype;
50297 /* Resolve references and pointers to va_list type. */
50298 if (TREE_CODE (type) == MEM_REF)
50299 type = TREE_TYPE (type);
50300 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50301 type = TREE_TYPE (type);
50302 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50303 type = TREE_TYPE (type);
50305 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50307 wtype = va_list_type_node;
50308 gcc_assert (wtype != NULL_TREE);
50309 htype = type;
50310 if (TREE_CODE (wtype) == ARRAY_TYPE)
50312 /* If va_list is an array type, the argument may have decayed
50313 to a pointer type, e.g. by being passed to another function.
50314 In that case, unwrap both types so that we can compare the
50315 underlying records. */
50316 if (TREE_CODE (htype) == ARRAY_TYPE
50317 || POINTER_TYPE_P (htype))
50319 wtype = TREE_TYPE (wtype);
50320 htype = TREE_TYPE (htype);
50323 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50324 return va_list_type_node;
50325 wtype = sysv_va_list_type_node;
50326 gcc_assert (wtype != NULL_TREE);
50327 htype = type;
50328 if (TREE_CODE (wtype) == ARRAY_TYPE)
50330 /* If va_list is an array type, the argument may have decayed
50331 to a pointer type, e.g. by being passed to another function.
50332 In that case, unwrap both types so that we can compare the
50333 underlying records. */
50334 if (TREE_CODE (htype) == ARRAY_TYPE
50335 || POINTER_TYPE_P (htype))
50337 wtype = TREE_TYPE (wtype);
50338 htype = TREE_TYPE (htype);
50341 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50342 return sysv_va_list_type_node;
50343 wtype = ms_va_list_type_node;
50344 gcc_assert (wtype != NULL_TREE);
50345 htype = type;
50346 if (TREE_CODE (wtype) == ARRAY_TYPE)
50348 /* If va_list is an array type, the argument may have decayed
50349 to a pointer type, e.g. by being passed to another function.
50350 In that case, unwrap both types so that we can compare the
50351 underlying records. */
50352 if (TREE_CODE (htype) == ARRAY_TYPE
50353 || POINTER_TYPE_P (htype))
50355 wtype = TREE_TYPE (wtype);
50356 htype = TREE_TYPE (htype);
50359 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50360 return ms_va_list_type_node;
50361 return NULL_TREE;
50363 return std_canonical_va_list_type (type);
50366 /* Iterate through the target-specific builtin types for va_list.
50367 IDX denotes the iterator, *PTREE is set to the result type of
50368 the va_list builtin, and *PNAME to its internal type.
50369 Returns zero if there is no element for this index, otherwise
50370 IDX should be increased upon the next call.
50371 Note, do not iterate a base builtin's name like __builtin_va_list.
50372 Used from c_common_nodes_and_builtins. */
50374 static int
50375 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50377 if (TARGET_64BIT)
50379 switch (idx)
50381 default:
50382 break;
50384 case 0:
50385 *ptree = ms_va_list_type_node;
50386 *pname = "__builtin_ms_va_list";
50387 return 1;
50389 case 1:
50390 *ptree = sysv_va_list_type_node;
50391 *pname = "__builtin_sysv_va_list";
50392 return 1;
50396 return 0;
50399 #undef TARGET_SCHED_DISPATCH
50400 #define TARGET_SCHED_DISPATCH has_dispatch
50401 #undef TARGET_SCHED_DISPATCH_DO
50402 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50403 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50404 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50405 #undef TARGET_SCHED_REORDER
50406 #define TARGET_SCHED_REORDER ix86_sched_reorder
50407 #undef TARGET_SCHED_ADJUST_PRIORITY
50408 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50409 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50410 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50411 ix86_dependencies_evaluation_hook
50413 /* The size of the dispatch window is the total number of bytes of
50414 object code allowed in a window. */
50415 #define DISPATCH_WINDOW_SIZE 16
50417 /* Number of dispatch windows considered for scheduling. */
50418 #define MAX_DISPATCH_WINDOWS 3
50420 /* Maximum number of instructions in a window. */
50421 #define MAX_INSN 4
50423 /* Maximum number of immediate operands in a window. */
50424 #define MAX_IMM 4
50426 /* Maximum number of immediate bits allowed in a window. */
50427 #define MAX_IMM_SIZE 128
50429 /* Maximum number of 32 bit immediates allowed in a window. */
50430 #define MAX_IMM_32 4
50432 /* Maximum number of 64 bit immediates allowed in a window. */
50433 #define MAX_IMM_64 2
50435 /* Maximum total of loads or prefetches allowed in a window. */
50436 #define MAX_LOAD 2
50438 /* Maximum total of stores allowed in a window. */
50439 #define MAX_STORE 1
50441 #undef BIG
50442 #define BIG 100
50445 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50446 enum dispatch_group {
50447 disp_no_group = 0,
50448 disp_load,
50449 disp_store,
50450 disp_load_store,
50451 disp_prefetch,
50452 disp_imm,
50453 disp_imm_32,
50454 disp_imm_64,
50455 disp_branch,
50456 disp_cmp,
50457 disp_jcc,
50458 disp_last
50461 /* Number of allowable groups in a dispatch window. It is an array
50462 indexed by dispatch_group enum. 100 is used as a big number,
50463 because the number of these kind of operations does not have any
50464 effect in dispatch window, but we need them for other reasons in
50465 the table. */
50466 static unsigned int num_allowable_groups[disp_last] = {
50467 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50470 char group_name[disp_last + 1][16] = {
50471 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50472 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50473 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50476 /* Instruction path. */
50477 enum insn_path {
50478 no_path = 0,
50479 path_single, /* Single micro op. */
50480 path_double, /* Double micro op. */
50481 path_multi, /* Instructions with more than 2 micro op.. */
50482 last_path
50485 /* sched_insn_info defines a window to the instructions scheduled in
50486 the basic block. It contains a pointer to the insn_info table and
50487 the instruction scheduled.
50489 Windows are allocated for each basic block and are linked
50490 together. */
50491 typedef struct sched_insn_info_s {
50492 rtx insn;
50493 enum dispatch_group group;
50494 enum insn_path path;
50495 int byte_len;
50496 int imm_bytes;
50497 } sched_insn_info;
50499 /* Linked list of dispatch windows. This is a two way list of
50500 dispatch windows of a basic block. It contains information about
50501 the number of uops in the window and the total number of
50502 instructions and of bytes in the object code for this dispatch
50503 window. */
50504 typedef struct dispatch_windows_s {
50505 int num_insn; /* Number of insn in the window. */
50506 int num_uops; /* Number of uops in the window. */
50507 int window_size; /* Number of bytes in the window. */
50508 int window_num; /* Window number between 0 or 1. */
50509 int num_imm; /* Number of immediates in an insn. */
50510 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50511 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50512 int imm_size; /* Total immediates in the window. */
50513 int num_loads; /* Total memory loads in the window. */
50514 int num_stores; /* Total memory stores in the window. */
50515 int violation; /* Violation exists in window. */
50516 sched_insn_info *window; /* Pointer to the window. */
50517 struct dispatch_windows_s *next;
50518 struct dispatch_windows_s *prev;
50519 } dispatch_windows;
50521 /* Immediate valuse used in an insn. */
50522 typedef struct imm_info_s
50524 int imm;
50525 int imm32;
50526 int imm64;
50527 } imm_info;
50529 static dispatch_windows *dispatch_window_list;
50530 static dispatch_windows *dispatch_window_list1;
50532 /* Get dispatch group of insn. */
50534 static enum dispatch_group
50535 get_mem_group (rtx_insn *insn)
50537 enum attr_memory memory;
50539 if (INSN_CODE (insn) < 0)
50540 return disp_no_group;
50541 memory = get_attr_memory (insn);
50542 if (memory == MEMORY_STORE)
50543 return disp_store;
50545 if (memory == MEMORY_LOAD)
50546 return disp_load;
50548 if (memory == MEMORY_BOTH)
50549 return disp_load_store;
50551 return disp_no_group;
50554 /* Return true if insn is a compare instruction. */
50556 static bool
50557 is_cmp (rtx_insn *insn)
50559 enum attr_type type;
50561 type = get_attr_type (insn);
50562 return (type == TYPE_TEST
50563 || type == TYPE_ICMP
50564 || type == TYPE_FCMP
50565 || GET_CODE (PATTERN (insn)) == COMPARE);
50568 /* Return true if a dispatch violation encountered. */
50570 static bool
50571 dispatch_violation (void)
50573 if (dispatch_window_list->next)
50574 return dispatch_window_list->next->violation;
50575 return dispatch_window_list->violation;
50578 /* Return true if insn is a branch instruction. */
50580 static bool
50581 is_branch (rtx insn)
50583 return (CALL_P (insn) || JUMP_P (insn));
50586 /* Return true if insn is a prefetch instruction. */
50588 static bool
50589 is_prefetch (rtx insn)
50591 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50594 /* This function initializes a dispatch window and the list container holding a
50595 pointer to the window. */
50597 static void
50598 init_window (int window_num)
50600 int i;
50601 dispatch_windows *new_list;
50603 if (window_num == 0)
50604 new_list = dispatch_window_list;
50605 else
50606 new_list = dispatch_window_list1;
50608 new_list->num_insn = 0;
50609 new_list->num_uops = 0;
50610 new_list->window_size = 0;
50611 new_list->next = NULL;
50612 new_list->prev = NULL;
50613 new_list->window_num = window_num;
50614 new_list->num_imm = 0;
50615 new_list->num_imm_32 = 0;
50616 new_list->num_imm_64 = 0;
50617 new_list->imm_size = 0;
50618 new_list->num_loads = 0;
50619 new_list->num_stores = 0;
50620 new_list->violation = false;
50622 for (i = 0; i < MAX_INSN; i++)
50624 new_list->window[i].insn = NULL;
50625 new_list->window[i].group = disp_no_group;
50626 new_list->window[i].path = no_path;
50627 new_list->window[i].byte_len = 0;
50628 new_list->window[i].imm_bytes = 0;
50630 return;
50633 /* This function allocates and initializes a dispatch window and the
50634 list container holding a pointer to the window. */
50636 static dispatch_windows *
50637 allocate_window (void)
50639 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50640 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50642 return new_list;
50645 /* This routine initializes the dispatch scheduling information. It
50646 initiates building dispatch scheduler tables and constructs the
50647 first dispatch window. */
50649 static void
50650 init_dispatch_sched (void)
50652 /* Allocate a dispatch list and a window. */
50653 dispatch_window_list = allocate_window ();
50654 dispatch_window_list1 = allocate_window ();
50655 init_window (0);
50656 init_window (1);
50659 /* This function returns true if a branch is detected. End of a basic block
50660 does not have to be a branch, but here we assume only branches end a
50661 window. */
50663 static bool
50664 is_end_basic_block (enum dispatch_group group)
50666 return group == disp_branch;
50669 /* This function is called when the end of a window processing is reached. */
50671 static void
50672 process_end_window (void)
50674 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50675 if (dispatch_window_list->next)
50677 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50678 gcc_assert (dispatch_window_list->window_size
50679 + dispatch_window_list1->window_size <= 48);
50680 init_window (1);
50682 init_window (0);
50685 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50686 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50687 for 48 bytes of instructions. Note that these windows are not dispatch
50688 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50690 static dispatch_windows *
50691 allocate_next_window (int window_num)
50693 if (window_num == 0)
50695 if (dispatch_window_list->next)
50696 init_window (1);
50697 init_window (0);
50698 return dispatch_window_list;
50701 dispatch_window_list->next = dispatch_window_list1;
50702 dispatch_window_list1->prev = dispatch_window_list;
50704 return dispatch_window_list1;
50707 /* Compute number of immediate operands of an instruction. */
50709 static void
50710 find_constant (rtx in_rtx, imm_info *imm_values)
50712 if (INSN_P (in_rtx))
50713 in_rtx = PATTERN (in_rtx);
50714 subrtx_iterator::array_type array;
50715 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50716 if (const_rtx x = *iter)
50717 switch (GET_CODE (x))
50719 case CONST:
50720 case SYMBOL_REF:
50721 case CONST_INT:
50722 (imm_values->imm)++;
50723 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50724 (imm_values->imm32)++;
50725 else
50726 (imm_values->imm64)++;
50727 break;
50729 case CONST_DOUBLE:
50730 (imm_values->imm)++;
50731 (imm_values->imm64)++;
50732 break;
50734 case CODE_LABEL:
50735 if (LABEL_KIND (x) == LABEL_NORMAL)
50737 (imm_values->imm)++;
50738 (imm_values->imm32)++;
50740 break;
50742 default:
50743 break;
50747 /* Return total size of immediate operands of an instruction along with number
50748 of corresponding immediate-operands. It initializes its parameters to zero
50749 befor calling FIND_CONSTANT.
50750 INSN is the input instruction. IMM is the total of immediates.
50751 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50752 bit immediates. */
50754 static int
50755 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50757 imm_info imm_values = {0, 0, 0};
50759 find_constant (insn, &imm_values);
50760 *imm = imm_values.imm;
50761 *imm32 = imm_values.imm32;
50762 *imm64 = imm_values.imm64;
50763 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50766 /* This function indicates if an operand of an instruction is an
50767 immediate. */
50769 static bool
50770 has_immediate (rtx insn)
50772 int num_imm_operand;
50773 int num_imm32_operand;
50774 int num_imm64_operand;
50776 if (insn)
50777 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50778 &num_imm64_operand);
50779 return false;
50782 /* Return single or double path for instructions. */
50784 static enum insn_path
50785 get_insn_path (rtx_insn *insn)
50787 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50789 if ((int)path == 0)
50790 return path_single;
50792 if ((int)path == 1)
50793 return path_double;
50795 return path_multi;
50798 /* Return insn dispatch group. */
50800 static enum dispatch_group
50801 get_insn_group (rtx_insn *insn)
50803 enum dispatch_group group = get_mem_group (insn);
50804 if (group)
50805 return group;
50807 if (is_branch (insn))
50808 return disp_branch;
50810 if (is_cmp (insn))
50811 return disp_cmp;
50813 if (has_immediate (insn))
50814 return disp_imm;
50816 if (is_prefetch (insn))
50817 return disp_prefetch;
50819 return disp_no_group;
50822 /* Count number of GROUP restricted instructions in a dispatch
50823 window WINDOW_LIST. */
50825 static int
50826 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50828 enum dispatch_group group = get_insn_group (insn);
50829 int imm_size;
50830 int num_imm_operand;
50831 int num_imm32_operand;
50832 int num_imm64_operand;
50834 if (group == disp_no_group)
50835 return 0;
50837 if (group == disp_imm)
50839 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50840 &num_imm64_operand);
50841 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50842 || num_imm_operand + window_list->num_imm > MAX_IMM
50843 || (num_imm32_operand > 0
50844 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50845 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50846 || (num_imm64_operand > 0
50847 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50848 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50849 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50850 && num_imm64_operand > 0
50851 && ((window_list->num_imm_64 > 0
50852 && window_list->num_insn >= 2)
50853 || window_list->num_insn >= 3)))
50854 return BIG;
50856 return 1;
50859 if ((group == disp_load_store
50860 && (window_list->num_loads >= MAX_LOAD
50861 || window_list->num_stores >= MAX_STORE))
50862 || ((group == disp_load
50863 || group == disp_prefetch)
50864 && window_list->num_loads >= MAX_LOAD)
50865 || (group == disp_store
50866 && window_list->num_stores >= MAX_STORE))
50867 return BIG;
50869 return 1;
50872 /* This function returns true if insn satisfies dispatch rules on the
50873 last window scheduled. */
50875 static bool
50876 fits_dispatch_window (rtx_insn *insn)
50878 dispatch_windows *window_list = dispatch_window_list;
50879 dispatch_windows *window_list_next = dispatch_window_list->next;
50880 unsigned int num_restrict;
50881 enum dispatch_group group = get_insn_group (insn);
50882 enum insn_path path = get_insn_path (insn);
50883 int sum;
50885 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50886 instructions should be given the lowest priority in the
50887 scheduling process in Haifa scheduler to make sure they will be
50888 scheduled in the same dispatch window as the reference to them. */
50889 if (group == disp_jcc || group == disp_cmp)
50890 return false;
50892 /* Check nonrestricted. */
50893 if (group == disp_no_group || group == disp_branch)
50894 return true;
50896 /* Get last dispatch window. */
50897 if (window_list_next)
50898 window_list = window_list_next;
50900 if (window_list->window_num == 1)
50902 sum = window_list->prev->window_size + window_list->window_size;
50904 if (sum == 32
50905 || (min_insn_size (insn) + sum) >= 48)
50906 /* Window 1 is full. Go for next window. */
50907 return true;
50910 num_restrict = count_num_restricted (insn, window_list);
50912 if (num_restrict > num_allowable_groups[group])
50913 return false;
50915 /* See if it fits in the first window. */
50916 if (window_list->window_num == 0)
50918 /* The first widow should have only single and double path
50919 uops. */
50920 if (path == path_double
50921 && (window_list->num_uops + 2) > MAX_INSN)
50922 return false;
50923 else if (path != path_single)
50924 return false;
50926 return true;
50929 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50930 dispatch window WINDOW_LIST. */
50932 static void
50933 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50935 int byte_len = min_insn_size (insn);
50936 int num_insn = window_list->num_insn;
50937 int imm_size;
50938 sched_insn_info *window = window_list->window;
50939 enum dispatch_group group = get_insn_group (insn);
50940 enum insn_path path = get_insn_path (insn);
50941 int num_imm_operand;
50942 int num_imm32_operand;
50943 int num_imm64_operand;
50945 if (!window_list->violation && group != disp_cmp
50946 && !fits_dispatch_window (insn))
50947 window_list->violation = true;
50949 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50950 &num_imm64_operand);
50952 /* Initialize window with new instruction. */
50953 window[num_insn].insn = insn;
50954 window[num_insn].byte_len = byte_len;
50955 window[num_insn].group = group;
50956 window[num_insn].path = path;
50957 window[num_insn].imm_bytes = imm_size;
50959 window_list->window_size += byte_len;
50960 window_list->num_insn = num_insn + 1;
50961 window_list->num_uops = window_list->num_uops + num_uops;
50962 window_list->imm_size += imm_size;
50963 window_list->num_imm += num_imm_operand;
50964 window_list->num_imm_32 += num_imm32_operand;
50965 window_list->num_imm_64 += num_imm64_operand;
50967 if (group == disp_store)
50968 window_list->num_stores += 1;
50969 else if (group == disp_load
50970 || group == disp_prefetch)
50971 window_list->num_loads += 1;
50972 else if (group == disp_load_store)
50974 window_list->num_stores += 1;
50975 window_list->num_loads += 1;
50979 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50980 If the total bytes of instructions or the number of instructions in
50981 the window exceed allowable, it allocates a new window. */
50983 static void
50984 add_to_dispatch_window (rtx_insn *insn)
50986 int byte_len;
50987 dispatch_windows *window_list;
50988 dispatch_windows *next_list;
50989 dispatch_windows *window0_list;
50990 enum insn_path path;
50991 enum dispatch_group insn_group;
50992 bool insn_fits;
50993 int num_insn;
50994 int num_uops;
50995 int window_num;
50996 int insn_num_uops;
50997 int sum;
50999 if (INSN_CODE (insn) < 0)
51000 return;
51002 byte_len = min_insn_size (insn);
51003 window_list = dispatch_window_list;
51004 next_list = window_list->next;
51005 path = get_insn_path (insn);
51006 insn_group = get_insn_group (insn);
51008 /* Get the last dispatch window. */
51009 if (next_list)
51010 window_list = dispatch_window_list->next;
51012 if (path == path_single)
51013 insn_num_uops = 1;
51014 else if (path == path_double)
51015 insn_num_uops = 2;
51016 else
51017 insn_num_uops = (int) path;
51019 /* If current window is full, get a new window.
51020 Window number zero is full, if MAX_INSN uops are scheduled in it.
51021 Window number one is full, if window zero's bytes plus window
51022 one's bytes is 32, or if the bytes of the new instruction added
51023 to the total makes it greater than 48, or it has already MAX_INSN
51024 instructions in it. */
51025 num_insn = window_list->num_insn;
51026 num_uops = window_list->num_uops;
51027 window_num = window_list->window_num;
51028 insn_fits = fits_dispatch_window (insn);
51030 if (num_insn >= MAX_INSN
51031 || num_uops + insn_num_uops > MAX_INSN
51032 || !(insn_fits))
51034 window_num = ~window_num & 1;
51035 window_list = allocate_next_window (window_num);
51038 if (window_num == 0)
51040 add_insn_window (insn, window_list, insn_num_uops);
51041 if (window_list->num_insn >= MAX_INSN
51042 && insn_group == disp_branch)
51044 process_end_window ();
51045 return;
51048 else if (window_num == 1)
51050 window0_list = window_list->prev;
51051 sum = window0_list->window_size + window_list->window_size;
51052 if (sum == 32
51053 || (byte_len + sum) >= 48)
51055 process_end_window ();
51056 window_list = dispatch_window_list;
51059 add_insn_window (insn, window_list, insn_num_uops);
51061 else
51062 gcc_unreachable ();
51064 if (is_end_basic_block (insn_group))
51066 /* End of basic block is reached do end-basic-block process. */
51067 process_end_window ();
51068 return;
51072 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51074 DEBUG_FUNCTION static void
51075 debug_dispatch_window_file (FILE *file, int window_num)
51077 dispatch_windows *list;
51078 int i;
51080 if (window_num == 0)
51081 list = dispatch_window_list;
51082 else
51083 list = dispatch_window_list1;
51085 fprintf (file, "Window #%d:\n", list->window_num);
51086 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51087 list->num_insn, list->num_uops, list->window_size);
51088 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51089 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51091 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51092 list->num_stores);
51093 fprintf (file, " insn info:\n");
51095 for (i = 0; i < MAX_INSN; i++)
51097 if (!list->window[i].insn)
51098 break;
51099 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51100 i, group_name[list->window[i].group],
51101 i, (void *)list->window[i].insn,
51102 i, list->window[i].path,
51103 i, list->window[i].byte_len,
51104 i, list->window[i].imm_bytes);
51108 /* Print to stdout a dispatch window. */
51110 DEBUG_FUNCTION void
51111 debug_dispatch_window (int window_num)
51113 debug_dispatch_window_file (stdout, window_num);
51116 /* Print INSN dispatch information to FILE. */
51118 DEBUG_FUNCTION static void
51119 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51121 int byte_len;
51122 enum insn_path path;
51123 enum dispatch_group group;
51124 int imm_size;
51125 int num_imm_operand;
51126 int num_imm32_operand;
51127 int num_imm64_operand;
51129 if (INSN_CODE (insn) < 0)
51130 return;
51132 byte_len = min_insn_size (insn);
51133 path = get_insn_path (insn);
51134 group = get_insn_group (insn);
51135 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51136 &num_imm64_operand);
51138 fprintf (file, " insn info:\n");
51139 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51140 group_name[group], path, byte_len);
51141 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51142 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51145 /* Print to STDERR the status of the ready list with respect to
51146 dispatch windows. */
51148 DEBUG_FUNCTION void
51149 debug_ready_dispatch (void)
51151 int i;
51152 int no_ready = number_in_ready ();
51154 fprintf (stdout, "Number of ready: %d\n", no_ready);
51156 for (i = 0; i < no_ready; i++)
51157 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51160 /* This routine is the driver of the dispatch scheduler. */
51162 static void
51163 do_dispatch (rtx_insn *insn, int mode)
51165 if (mode == DISPATCH_INIT)
51166 init_dispatch_sched ();
51167 else if (mode == ADD_TO_DISPATCH_WINDOW)
51168 add_to_dispatch_window (insn);
51171 /* Return TRUE if Dispatch Scheduling is supported. */
51173 static bool
51174 has_dispatch (rtx_insn *insn, int action)
51176 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51177 && flag_dispatch_scheduler)
51178 switch (action)
51180 default:
51181 return false;
51183 case IS_DISPATCH_ON:
51184 return true;
51185 break;
51187 case IS_CMP:
51188 return is_cmp (insn);
51190 case DISPATCH_VIOLATION:
51191 return dispatch_violation ();
51193 case FITS_DISPATCH_WINDOW:
51194 return fits_dispatch_window (insn);
51197 return false;
51200 /* Implementation of reassociation_width target hook used by
51201 reassoc phase to identify parallelism level in reassociated
51202 tree. Statements tree_code is passed in OPC. Arguments type
51203 is passed in MODE.
51205 Currently parallel reassociation is enabled for Atom
51206 processors only and we set reassociation width to be 2
51207 because Atom may issue up to 2 instructions per cycle.
51209 Return value should be fixed if parallel reassociation is
51210 enabled for other processors. */
51212 static int
51213 ix86_reassociation_width (unsigned int, machine_mode mode)
51215 /* Vector part. */
51216 if (VECTOR_MODE_P (mode))
51218 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51219 return 2;
51220 else
51221 return 1;
51224 /* Scalar part. */
51225 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51226 return 2;
51227 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51228 return 2;
51229 else
51230 return 1;
51233 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51234 place emms and femms instructions. */
51236 static machine_mode
51237 ix86_preferred_simd_mode (machine_mode mode)
51239 if (!TARGET_SSE)
51240 return word_mode;
51242 switch (mode)
51244 case QImode:
51245 return TARGET_AVX512BW ? V64QImode :
51246 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51247 case HImode:
51248 return TARGET_AVX512BW ? V32HImode :
51249 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51250 case SImode:
51251 return TARGET_AVX512F ? V16SImode :
51252 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51253 case DImode:
51254 return TARGET_AVX512F ? V8DImode :
51255 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51257 case SFmode:
51258 if (TARGET_AVX512F)
51259 return V16SFmode;
51260 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51261 return V8SFmode;
51262 else
51263 return V4SFmode;
51265 case DFmode:
51266 if (!TARGET_VECTORIZE_DOUBLE)
51267 return word_mode;
51268 else if (TARGET_AVX512F)
51269 return V8DFmode;
51270 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51271 return V4DFmode;
51272 else if (TARGET_SSE2)
51273 return V2DFmode;
51274 /* FALLTHRU */
51276 default:
51277 return word_mode;
51281 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51282 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51283 256bit and 128bit vectors. */
51285 static unsigned int
51286 ix86_autovectorize_vector_sizes (void)
51288 return TARGET_AVX512F ? 64 | 32 | 16 :
51289 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51294 /* Return class of registers which could be used for pseudo of MODE
51295 and of class RCLASS for spilling instead of memory. Return NO_REGS
51296 if it is not possible or non-profitable. */
51297 static reg_class_t
51298 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51300 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51301 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51302 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51303 return ALL_SSE_REGS;
51304 return NO_REGS;
51307 /* Implement targetm.vectorize.init_cost. */
51309 static void *
51310 ix86_init_cost (struct loop *)
51312 unsigned *cost = XNEWVEC (unsigned, 3);
51313 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51314 return cost;
51317 /* Implement targetm.vectorize.add_stmt_cost. */
51319 static unsigned
51320 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51321 struct _stmt_vec_info *stmt_info, int misalign,
51322 enum vect_cost_model_location where)
51324 unsigned *cost = (unsigned *) data;
51325 unsigned retval = 0;
51327 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51328 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51330 /* Statements in an inner loop relative to the loop being
51331 vectorized are weighted more heavily. The value here is
51332 arbitrary and could potentially be improved with analysis. */
51333 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51334 count *= 50; /* FIXME. */
51336 retval = (unsigned) (count * stmt_cost);
51338 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51339 for Silvermont as it has out of order integer pipeline and can execute
51340 2 scalar instruction per tick, but has in order SIMD pipeline. */
51341 if (TARGET_SILVERMONT || TARGET_INTEL)
51342 if (stmt_info && stmt_info->stmt)
51344 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51345 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51346 retval = (retval * 17) / 10;
51349 cost[where] += retval;
51351 return retval;
51354 /* Implement targetm.vectorize.finish_cost. */
51356 static void
51357 ix86_finish_cost (void *data, unsigned *prologue_cost,
51358 unsigned *body_cost, unsigned *epilogue_cost)
51360 unsigned *cost = (unsigned *) data;
51361 *prologue_cost = cost[vect_prologue];
51362 *body_cost = cost[vect_body];
51363 *epilogue_cost = cost[vect_epilogue];
51366 /* Implement targetm.vectorize.destroy_cost_data. */
51368 static void
51369 ix86_destroy_cost_data (void *data)
51371 free (data);
51374 /* Validate target specific memory model bits in VAL. */
51376 static unsigned HOST_WIDE_INT
51377 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51379 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51380 bool strong;
51382 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51383 |MEMMODEL_MASK)
51384 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51386 warning (OPT_Winvalid_memory_model,
51387 "Unknown architecture specific memory model");
51388 return MEMMODEL_SEQ_CST;
51390 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51391 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51393 warning (OPT_Winvalid_memory_model,
51394 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51395 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51397 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51399 warning (OPT_Winvalid_memory_model,
51400 "HLE_RELEASE not used with RELEASE or stronger memory model");
51401 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51403 return val;
51406 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51407 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51408 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51409 or number of vecsize_mangle variants that should be emitted. */
51411 static int
51412 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51413 struct cgraph_simd_clone *clonei,
51414 tree base_type, int num)
51416 int ret = 1;
51418 if (clonei->simdlen
51419 && (clonei->simdlen < 2
51420 || clonei->simdlen > 16
51421 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51423 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51424 "unsupported simdlen %d", clonei->simdlen);
51425 return 0;
51428 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51429 if (TREE_CODE (ret_type) != VOID_TYPE)
51430 switch (TYPE_MODE (ret_type))
51432 case QImode:
51433 case HImode:
51434 case SImode:
51435 case DImode:
51436 case SFmode:
51437 case DFmode:
51438 /* case SCmode: */
51439 /* case DCmode: */
51440 break;
51441 default:
51442 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51443 "unsupported return type %qT for simd\n", ret_type);
51444 return 0;
51447 tree t;
51448 int i;
51450 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51451 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51452 switch (TYPE_MODE (TREE_TYPE (t)))
51454 case QImode:
51455 case HImode:
51456 case SImode:
51457 case DImode:
51458 case SFmode:
51459 case DFmode:
51460 /* case SCmode: */
51461 /* case DCmode: */
51462 break;
51463 default:
51464 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51465 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51466 return 0;
51469 if (clonei->cilk_elemental)
51471 /* Parse here processor clause. If not present, default to 'b'. */
51472 clonei->vecsize_mangle = 'b';
51474 else if (!TREE_PUBLIC (node->decl))
51476 /* If the function isn't exported, we can pick up just one ISA
51477 for the clones. */
51478 if (TARGET_AVX2)
51479 clonei->vecsize_mangle = 'd';
51480 else if (TARGET_AVX)
51481 clonei->vecsize_mangle = 'c';
51482 else
51483 clonei->vecsize_mangle = 'b';
51484 ret = 1;
51486 else
51488 clonei->vecsize_mangle = "bcd"[num];
51489 ret = 3;
51491 switch (clonei->vecsize_mangle)
51493 case 'b':
51494 clonei->vecsize_int = 128;
51495 clonei->vecsize_float = 128;
51496 break;
51497 case 'c':
51498 clonei->vecsize_int = 128;
51499 clonei->vecsize_float = 256;
51500 break;
51501 case 'd':
51502 clonei->vecsize_int = 256;
51503 clonei->vecsize_float = 256;
51504 break;
51506 if (clonei->simdlen == 0)
51508 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51509 clonei->simdlen = clonei->vecsize_int;
51510 else
51511 clonei->simdlen = clonei->vecsize_float;
51512 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51513 if (clonei->simdlen > 16)
51514 clonei->simdlen = 16;
51516 return ret;
51519 /* Add target attribute to SIMD clone NODE if needed. */
51521 static void
51522 ix86_simd_clone_adjust (struct cgraph_node *node)
51524 const char *str = NULL;
51525 gcc_assert (node->decl == cfun->decl);
51526 switch (node->simdclone->vecsize_mangle)
51528 case 'b':
51529 if (!TARGET_SSE2)
51530 str = "sse2";
51531 break;
51532 case 'c':
51533 if (!TARGET_AVX)
51534 str = "avx";
51535 break;
51536 case 'd':
51537 if (!TARGET_AVX2)
51538 str = "avx2";
51539 break;
51540 default:
51541 gcc_unreachable ();
51543 if (str == NULL)
51544 return;
51545 push_cfun (NULL);
51546 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51547 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51548 gcc_assert (ok);
51549 pop_cfun ();
51550 ix86_reset_previous_fndecl ();
51551 ix86_set_current_function (node->decl);
51554 /* If SIMD clone NODE can't be used in a vectorized loop
51555 in current function, return -1, otherwise return a badness of using it
51556 (0 if it is most desirable from vecsize_mangle point of view, 1
51557 slightly less desirable, etc.). */
51559 static int
51560 ix86_simd_clone_usable (struct cgraph_node *node)
51562 switch (node->simdclone->vecsize_mangle)
51564 case 'b':
51565 if (!TARGET_SSE2)
51566 return -1;
51567 if (!TARGET_AVX)
51568 return 0;
51569 return TARGET_AVX2 ? 2 : 1;
51570 case 'c':
51571 if (!TARGET_AVX)
51572 return -1;
51573 return TARGET_AVX2 ? 1 : 0;
51574 break;
51575 case 'd':
51576 if (!TARGET_AVX2)
51577 return -1;
51578 return 0;
51579 default:
51580 gcc_unreachable ();
51584 /* This function adjusts the unroll factor based on
51585 the hardware capabilities. For ex, bdver3 has
51586 a loop buffer which makes unrolling of smaller
51587 loops less important. This function decides the
51588 unroll factor using number of memory references
51589 (value 32 is used) as a heuristic. */
51591 static unsigned
51592 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51594 basic_block *bbs;
51595 rtx_insn *insn;
51596 unsigned i;
51597 unsigned mem_count = 0;
51599 if (!TARGET_ADJUST_UNROLL)
51600 return nunroll;
51602 /* Count the number of memory references within the loop body.
51603 This value determines the unrolling factor for bdver3 and bdver4
51604 architectures. */
51605 subrtx_iterator::array_type array;
51606 bbs = get_loop_body (loop);
51607 for (i = 0; i < loop->num_nodes; i++)
51608 FOR_BB_INSNS (bbs[i], insn)
51609 if (NONDEBUG_INSN_P (insn))
51610 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51611 if (const_rtx x = *iter)
51612 if (MEM_P (x))
51614 machine_mode mode = GET_MODE (x);
51615 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51616 if (n_words > 4)
51617 mem_count += 2;
51618 else
51619 mem_count += 1;
51621 free (bbs);
51623 if (mem_count && mem_count <=32)
51624 return 32/mem_count;
51626 return nunroll;
51630 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51632 static bool
51633 ix86_float_exceptions_rounding_supported_p (void)
51635 /* For x87 floating point with standard excess precision handling,
51636 there is no adddf3 pattern (since x87 floating point only has
51637 XFmode operations) so the default hook implementation gets this
51638 wrong. */
51639 return TARGET_80387 || TARGET_SSE_MATH;
51642 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51644 static void
51645 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51647 if (!TARGET_80387 && !TARGET_SSE_MATH)
51648 return;
51649 tree exceptions_var = create_tmp_var (integer_type_node);
51650 if (TARGET_80387)
51652 tree fenv_index_type = build_index_type (size_int (6));
51653 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51654 tree fenv_var = create_tmp_var (fenv_type);
51655 mark_addressable (fenv_var);
51656 tree fenv_ptr = build_pointer_type (fenv_type);
51657 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51658 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51659 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51660 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51661 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51662 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51663 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51664 tree hold_fnclex = build_call_expr (fnclex, 0);
51665 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51666 hold_fnclex);
51667 *clear = build_call_expr (fnclex, 0);
51668 tree sw_var = create_tmp_var (short_unsigned_type_node);
51669 tree fnstsw_call = build_call_expr (fnstsw, 0);
51670 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51671 sw_var, fnstsw_call);
51672 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51673 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51674 exceptions_var, exceptions_x87);
51675 *update = build2 (COMPOUND_EXPR, integer_type_node,
51676 sw_mod, update_mod);
51677 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51678 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51680 if (TARGET_SSE_MATH)
51682 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51683 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51684 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51685 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51686 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51687 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51688 mxcsr_orig_var, stmxcsr_hold_call);
51689 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51690 mxcsr_orig_var,
51691 build_int_cst (unsigned_type_node, 0x1f80));
51692 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51693 build_int_cst (unsigned_type_node, 0xffffffc0));
51694 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51695 mxcsr_mod_var, hold_mod_val);
51696 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51697 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51698 hold_assign_orig, hold_assign_mod);
51699 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51700 ldmxcsr_hold_call);
51701 if (*hold)
51702 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51703 else
51704 *hold = hold_all;
51705 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51706 if (*clear)
51707 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51708 ldmxcsr_clear_call);
51709 else
51710 *clear = ldmxcsr_clear_call;
51711 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51712 tree exceptions_sse = fold_convert (integer_type_node,
51713 stxmcsr_update_call);
51714 if (*update)
51716 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51717 exceptions_var, exceptions_sse);
51718 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51719 exceptions_var, exceptions_mod);
51720 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51721 exceptions_assign);
51723 else
51724 *update = build2 (MODIFY_EXPR, integer_type_node,
51725 exceptions_var, exceptions_sse);
51726 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51727 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51728 ldmxcsr_update_call);
51730 tree atomic_feraiseexcept
51731 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51732 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51733 1, exceptions_var);
51734 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51735 atomic_feraiseexcept_call);
51738 /* Return mode to be used for bounds or VOIDmode
51739 if bounds are not supported. */
51741 static enum machine_mode
51742 ix86_mpx_bound_mode ()
51744 /* Do not support pointer checker if MPX
51745 is not enabled. */
51746 if (!TARGET_MPX)
51748 if (flag_check_pointer_bounds)
51749 warning (0, "Pointer Checker requires MPX support on this target."
51750 " Use -mmpx options to enable MPX.");
51751 return VOIDmode;
51754 return BNDmode;
51757 /* Return constant used to statically initialize constant bounds.
51759 This function is used to create special bound values. For now
51760 only INIT bounds and NONE bounds are expected. More special
51761 values may be added later. */
51763 static tree
51764 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51766 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51767 : build_zero_cst (pointer_sized_int_node);
51768 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51769 : build_minus_one_cst (pointer_sized_int_node);
51771 /* This function is supposed to be used to create INIT and
51772 NONE bounds only. */
51773 gcc_assert ((lb == 0 && ub == -1)
51774 || (lb == -1 && ub == 0));
51776 return build_complex (NULL, low, high);
51779 /* Generate a list of statements STMTS to initialize pointer bounds
51780 variable VAR with bounds LB and UB. Return the number of generated
51781 statements. */
51783 static int
51784 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51786 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51787 tree lhs, modify, var_p;
51789 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51790 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51792 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51793 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51794 append_to_statement_list (modify, stmts);
51796 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51797 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51798 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51799 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51800 append_to_statement_list (modify, stmts);
51802 return 2;
51805 /* Initialize the GCC target structure. */
51806 #undef TARGET_RETURN_IN_MEMORY
51807 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51809 #undef TARGET_LEGITIMIZE_ADDRESS
51810 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51812 #undef TARGET_ATTRIBUTE_TABLE
51813 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51814 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51815 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51816 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51817 # undef TARGET_MERGE_DECL_ATTRIBUTES
51818 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51819 #endif
51821 #undef TARGET_COMP_TYPE_ATTRIBUTES
51822 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51824 #undef TARGET_INIT_BUILTINS
51825 #define TARGET_INIT_BUILTINS ix86_init_builtins
51826 #undef TARGET_BUILTIN_DECL
51827 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51828 #undef TARGET_EXPAND_BUILTIN
51829 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51831 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51832 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51833 ix86_builtin_vectorized_function
51835 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51836 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51838 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51839 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51841 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51842 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51844 #undef TARGET_BUILTIN_RECIPROCAL
51845 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51847 #undef TARGET_ASM_FUNCTION_EPILOGUE
51848 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51850 #undef TARGET_ENCODE_SECTION_INFO
51851 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51852 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51853 #else
51854 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51855 #endif
51857 #undef TARGET_ASM_OPEN_PAREN
51858 #define TARGET_ASM_OPEN_PAREN ""
51859 #undef TARGET_ASM_CLOSE_PAREN
51860 #define TARGET_ASM_CLOSE_PAREN ""
51862 #undef TARGET_ASM_BYTE_OP
51863 #define TARGET_ASM_BYTE_OP ASM_BYTE
51865 #undef TARGET_ASM_ALIGNED_HI_OP
51866 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51867 #undef TARGET_ASM_ALIGNED_SI_OP
51868 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51869 #ifdef ASM_QUAD
51870 #undef TARGET_ASM_ALIGNED_DI_OP
51871 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51872 #endif
51874 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51875 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51877 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51878 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51880 #undef TARGET_ASM_UNALIGNED_HI_OP
51881 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51882 #undef TARGET_ASM_UNALIGNED_SI_OP
51883 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51884 #undef TARGET_ASM_UNALIGNED_DI_OP
51885 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51887 #undef TARGET_PRINT_OPERAND
51888 #define TARGET_PRINT_OPERAND ix86_print_operand
51889 #undef TARGET_PRINT_OPERAND_ADDRESS
51890 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51891 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51892 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51893 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51894 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51896 #undef TARGET_SCHED_INIT_GLOBAL
51897 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51898 #undef TARGET_SCHED_ADJUST_COST
51899 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51900 #undef TARGET_SCHED_ISSUE_RATE
51901 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51902 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51903 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51904 ia32_multipass_dfa_lookahead
51905 #undef TARGET_SCHED_MACRO_FUSION_P
51906 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51907 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51908 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51910 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51911 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51913 #undef TARGET_MEMMODEL_CHECK
51914 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51916 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51917 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51919 #ifdef HAVE_AS_TLS
51920 #undef TARGET_HAVE_TLS
51921 #define TARGET_HAVE_TLS true
51922 #endif
51923 #undef TARGET_CANNOT_FORCE_CONST_MEM
51924 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51925 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51926 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51928 #undef TARGET_DELEGITIMIZE_ADDRESS
51929 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51931 #undef TARGET_MS_BITFIELD_LAYOUT_P
51932 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51934 #if TARGET_MACHO
51935 #undef TARGET_BINDS_LOCAL_P
51936 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51937 #else
51938 #undef TARGET_BINDS_LOCAL_P
51939 #define TARGET_BINDS_LOCAL_P default_binds_local_p_2
51940 #endif
51941 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51942 #undef TARGET_BINDS_LOCAL_P
51943 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51944 #endif
51946 #undef TARGET_ASM_OUTPUT_MI_THUNK
51947 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51948 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51949 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51951 #undef TARGET_ASM_FILE_START
51952 #define TARGET_ASM_FILE_START x86_file_start
51954 #undef TARGET_OPTION_OVERRIDE
51955 #define TARGET_OPTION_OVERRIDE ix86_option_override
51957 #undef TARGET_REGISTER_MOVE_COST
51958 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51959 #undef TARGET_MEMORY_MOVE_COST
51960 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51961 #undef TARGET_RTX_COSTS
51962 #define TARGET_RTX_COSTS ix86_rtx_costs
51963 #undef TARGET_ADDRESS_COST
51964 #define TARGET_ADDRESS_COST ix86_address_cost
51966 #undef TARGET_FIXED_CONDITION_CODE_REGS
51967 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51968 #undef TARGET_CC_MODES_COMPATIBLE
51969 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51971 #undef TARGET_MACHINE_DEPENDENT_REORG
51972 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51974 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51975 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51977 #undef TARGET_BUILD_BUILTIN_VA_LIST
51978 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51980 #undef TARGET_FOLD_BUILTIN
51981 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51983 #undef TARGET_COMPARE_VERSION_PRIORITY
51984 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51986 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51987 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51988 ix86_generate_version_dispatcher_body
51990 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51991 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51992 ix86_get_function_versions_dispatcher
51994 #undef TARGET_ENUM_VA_LIST_P
51995 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51997 #undef TARGET_FN_ABI_VA_LIST
51998 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52000 #undef TARGET_CANONICAL_VA_LIST_TYPE
52001 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52003 #undef TARGET_EXPAND_BUILTIN_VA_START
52004 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52006 #undef TARGET_MD_ASM_CLOBBERS
52007 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
52009 #undef TARGET_PROMOTE_PROTOTYPES
52010 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52011 #undef TARGET_SETUP_INCOMING_VARARGS
52012 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52013 #undef TARGET_MUST_PASS_IN_STACK
52014 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52015 #undef TARGET_FUNCTION_ARG_ADVANCE
52016 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52017 #undef TARGET_FUNCTION_ARG
52018 #define TARGET_FUNCTION_ARG ix86_function_arg
52019 #undef TARGET_INIT_PIC_REG
52020 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52021 #undef TARGET_USE_PSEUDO_PIC_REG
52022 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52023 #undef TARGET_FUNCTION_ARG_BOUNDARY
52024 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52025 #undef TARGET_PASS_BY_REFERENCE
52026 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52027 #undef TARGET_INTERNAL_ARG_POINTER
52028 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52029 #undef TARGET_UPDATE_STACK_BOUNDARY
52030 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52031 #undef TARGET_GET_DRAP_RTX
52032 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52033 #undef TARGET_STRICT_ARGUMENT_NAMING
52034 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52035 #undef TARGET_STATIC_CHAIN
52036 #define TARGET_STATIC_CHAIN ix86_static_chain
52037 #undef TARGET_TRAMPOLINE_INIT
52038 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52039 #undef TARGET_RETURN_POPS_ARGS
52040 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52042 #undef TARGET_LEGITIMATE_COMBINED_INSN
52043 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52045 #undef TARGET_ASAN_SHADOW_OFFSET
52046 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52048 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52049 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52051 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52052 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52054 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52055 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52057 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52058 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52059 ix86_libgcc_floating_mode_supported_p
52061 #undef TARGET_C_MODE_FOR_SUFFIX
52062 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52064 #ifdef HAVE_AS_TLS
52065 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52066 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52067 #endif
52069 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52070 #undef TARGET_INSERT_ATTRIBUTES
52071 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52072 #endif
52074 #undef TARGET_MANGLE_TYPE
52075 #define TARGET_MANGLE_TYPE ix86_mangle_type
52077 #if !TARGET_MACHO
52078 #undef TARGET_STACK_PROTECT_FAIL
52079 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52080 #endif
52082 #undef TARGET_FUNCTION_VALUE
52083 #define TARGET_FUNCTION_VALUE ix86_function_value
52085 #undef TARGET_FUNCTION_VALUE_REGNO_P
52086 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52088 #undef TARGET_PROMOTE_FUNCTION_MODE
52089 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52091 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52092 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52094 #undef TARGET_INSTANTIATE_DECLS
52095 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52097 #undef TARGET_SECONDARY_RELOAD
52098 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52100 #undef TARGET_CLASS_MAX_NREGS
52101 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52103 #undef TARGET_PREFERRED_RELOAD_CLASS
52104 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52105 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52106 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52107 #undef TARGET_CLASS_LIKELY_SPILLED_P
52108 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52110 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52111 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52112 ix86_builtin_vectorization_cost
52113 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52114 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52115 ix86_vectorize_vec_perm_const_ok
52116 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52117 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52118 ix86_preferred_simd_mode
52119 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52120 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52121 ix86_autovectorize_vector_sizes
52122 #undef TARGET_VECTORIZE_INIT_COST
52123 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52124 #undef TARGET_VECTORIZE_ADD_STMT_COST
52125 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52126 #undef TARGET_VECTORIZE_FINISH_COST
52127 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52128 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52129 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52131 #undef TARGET_SET_CURRENT_FUNCTION
52132 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52134 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52135 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52137 #undef TARGET_OPTION_SAVE
52138 #define TARGET_OPTION_SAVE ix86_function_specific_save
52140 #undef TARGET_OPTION_RESTORE
52141 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52143 #undef TARGET_OPTION_POST_STREAM_IN
52144 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52146 #undef TARGET_OPTION_PRINT
52147 #define TARGET_OPTION_PRINT ix86_function_specific_print
52149 #undef TARGET_OPTION_FUNCTION_VERSIONS
52150 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52152 #undef TARGET_CAN_INLINE_P
52153 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52155 #undef TARGET_EXPAND_TO_RTL_HOOK
52156 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52158 #undef TARGET_LEGITIMATE_ADDRESS_P
52159 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52161 #undef TARGET_LRA_P
52162 #define TARGET_LRA_P hook_bool_void_true
52164 #undef TARGET_REGISTER_PRIORITY
52165 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52167 #undef TARGET_REGISTER_USAGE_LEVELING_P
52168 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52170 #undef TARGET_LEGITIMATE_CONSTANT_P
52171 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52173 #undef TARGET_FRAME_POINTER_REQUIRED
52174 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52176 #undef TARGET_CAN_ELIMINATE
52177 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52179 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52180 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52182 #undef TARGET_ASM_CODE_END
52183 #define TARGET_ASM_CODE_END ix86_code_end
52185 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52186 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52188 #if TARGET_MACHO
52189 #undef TARGET_INIT_LIBFUNCS
52190 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52191 #endif
52193 #undef TARGET_LOOP_UNROLL_ADJUST
52194 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52196 #undef TARGET_SPILL_CLASS
52197 #define TARGET_SPILL_CLASS ix86_spill_class
52199 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52200 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52201 ix86_simd_clone_compute_vecsize_and_simdlen
52203 #undef TARGET_SIMD_CLONE_ADJUST
52204 #define TARGET_SIMD_CLONE_ADJUST \
52205 ix86_simd_clone_adjust
52207 #undef TARGET_SIMD_CLONE_USABLE
52208 #define TARGET_SIMD_CLONE_USABLE \
52209 ix86_simd_clone_usable
52211 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52212 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52213 ix86_float_exceptions_rounding_supported_p
52215 #undef TARGET_MODE_EMIT
52216 #define TARGET_MODE_EMIT ix86_emit_mode_set
52218 #undef TARGET_MODE_NEEDED
52219 #define TARGET_MODE_NEEDED ix86_mode_needed
52221 #undef TARGET_MODE_AFTER
52222 #define TARGET_MODE_AFTER ix86_mode_after
52224 #undef TARGET_MODE_ENTRY
52225 #define TARGET_MODE_ENTRY ix86_mode_entry
52227 #undef TARGET_MODE_EXIT
52228 #define TARGET_MODE_EXIT ix86_mode_exit
52230 #undef TARGET_MODE_PRIORITY
52231 #define TARGET_MODE_PRIORITY ix86_mode_priority
52233 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52234 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52236 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52237 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52239 #undef TARGET_STORE_BOUNDS_FOR_ARG
52240 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52242 #undef TARGET_LOAD_RETURNED_BOUNDS
52243 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52245 #undef TARGET_STORE_RETURNED_BOUNDS
52246 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52248 #undef TARGET_CHKP_BOUND_MODE
52249 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52251 #undef TARGET_BUILTIN_CHKP_FUNCTION
52252 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52254 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52255 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52257 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52258 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52260 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52261 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52263 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52264 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52266 #undef TARGET_OFFLOAD_OPTIONS
52267 #define TARGET_OFFLOAD_OPTIONS \
52268 ix86_offload_options
52270 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52271 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52273 struct gcc_target targetm = TARGET_INITIALIZER;
52275 #include "gt-i386.h"